[Openmp-commits] [openmp] r205909 - Add the offload directory which contains the code needed to support

Jim Cownie james.h.cownie at intel.com
Wed Apr 9 08:40:24 PDT 2014


Added: openmp/trunk/offload/doc/doxygen/config
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/doc/doxygen/config?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/doc/doxygen/config (added)
+++ openmp/trunk/offload/doc/doxygen/config Wed Apr  9 10:40:23 2014
@@ -0,0 +1,2328 @@
+# Doxyfile 1.8.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "Intel® Offload Runtime Library"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = 
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = 
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
+# the documentation. The maximum height of the logo should not exceed 55 pixels
+# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
+# to the output directory.
+
+PROJECT_LOGO           = 
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc/doxygen/generated
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        = src/
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    = src/
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
+# new page for each member. If set to NO, the documentation of a member will be
+# part of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES                = 
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C.
+#
+# Note For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      = 
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by by putting a % sign in front of the word
+# or globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO these classes will be included in the various overviews. This option has
+# no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
+# todo list. This list is created by putting \todo commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
+# test list. This list is created by putting \test commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES the list
+# will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    = 
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            = 
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. Do not use file names with spaces, bibtex cannot handle them. See
+# also \cite for info how to create references.
+
+CITE_BIB_FILES         = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO doxygen will only warn about wrong or incomplete parameter
+# documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces.
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = src
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank the
+# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
+# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
+# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
+# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
+# *.qsf, *.as and *.js.
+
+FILE_PATTERNS          = *.c *.h *.cpp *.f90
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = src/imported src/rdtsc.h
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = 
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             = 
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER           = 
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER ) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS = 
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more acurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# compiled with the --with-libclang option.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS          = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        = 
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user-
+# defined cascading style sheet that is included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefor more robust against future updates.
+# Doxygen will copy the style sheet file to the output directory. For an example
+# see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  = 
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       = 
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the stylesheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               = 
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler ( hhc.exe). If non-empty
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           = 
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated (
+# YES) or that it should be included in the master .chm file ( NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     = 
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated (
+# YES) or a normal table of contents ( NO) in the .chm file.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               = 
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   = 
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  = 
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  = 
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           = 
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using prerendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     = 
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       = 
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavours of web server based searching depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools. See
+# the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       = 
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     = 
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. To get the times font for
+# instance you can specify
+# EXTRA_PACKAGES=times
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will
+# replace them by respectively the title of the page, the current date and time,
+# only the current date, the version number of doxygen, the project name (see
+# PROJECT_NAME), or the project number (see PROJECT_NUMBER).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           = doc/doxygen/header.tex
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           = 
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify a XML DTD, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
+# Definitions (see http://autogen.sf.net) file that captures the structure of
+# the code including all documentation. Note that this feature is still
+# experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
+# in the source code. If set to NO only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES the includes files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = COI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all refrences to function-like macros that are alone on a line, have an
+# all uppercase name, and do not end with a semicolon. Such function macros are
+# typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have an unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
+# class index. If set to NO only the inherited external classes will be listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
+# the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            = 
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               = 
+
+# If set to YES, the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font n the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           = 
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot.
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif and svg.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               = 
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           = 
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           = 
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           = 
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES

Propchange: openmp/trunk/offload/doc/doxygen/config
------------------------------------------------------------------------------
    svn:executable = *

Added: openmp/trunk/offload/doc/doxygen/header.tex
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/doc/doxygen/header.tex?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/doc/doxygen/header.tex (added)
+++ openmp/trunk/offload/doc/doxygen/header.tex Wed Apr  9 10:40:23 2014
@@ -0,0 +1,90 @@
+% Latex header for doxygen 1.8.3.1
+\documentclass{book}
+\usepackage[a4paper,top=2.5cm,bottom=2.5cm,left=2.5cm,right=2.5cm]{geometry}
+\usepackage{makeidx}
+\usepackage{natbib}
+\usepackage{graphicx}
+\usepackage{multicol}
+\usepackage{float}
+\usepackage{listings}
+\usepackage{color}
+\usepackage{ifthen}
+\usepackage[table]{xcolor}
+\usepackage{textcomp}
+\usepackage{alltt}
+\usepackage{ifpdf}
+\ifpdf
+\usepackage[pdftex,
+            pagebackref=true,
+            colorlinks=true,
+            linkcolor=blue,
+            unicode
+           ]{hyperref}
+\else
+\usepackage[ps2pdf,
+            pagebackref=true,
+            colorlinks=true,
+            linkcolor=blue,
+            unicode
+           ]{hyperref}
+\usepackage{pspicture}
+\fi
+\usepackage[utf8]{inputenc}
+\usepackage{mathptmx}
+\usepackage[scaled=.90]{helvet}
+\usepackage{courier}
+\usepackage{sectsty}
+\usepackage{amssymb}
+\usepackage[titles]{tocloft}
+\usepackage{doxygen}
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\lstset{language=C++,inputencoding=utf8,basicstyle=\footnotesize,breaklines=true,breakatwhitespace=true,tabsize=4,numbers=left }
+\makeindex
+\setcounter{tocdepth}{3}
+\renewcommand{\footrulewidth}{0.4pt}
+\renewcommand{\familydefault}{\sfdefault}
+\hfuzz=15pt
+\setlength{\emergencystretch}{15pt}
+\hbadness=750
+\tolerance=750
+\begin{document}
+\hypersetup{pageanchor=false,citecolor=blue}
+\begin{titlepage}
+\vspace*{7cm}
+\begin{center}
+{\Large Intel\textsuperscript{\textregistered} Offload Runtime Library }\\
+\vspace*{1cm}
+{\large Generated by Doxygen $doxygenversion }\\
+\vspace*{0.5cm}
+{\small $datetime }\\
+\end{center}
+\end{titlepage}
+
+{\bf FTC Optimization Notice}
+
+Intel's compilers may or may not optimize to the same degree for non-Intel microprocessors for
+optimizations that are not unique to Intel microprocessors. These optimizations include SSE2,
+SSE3, and SSSE3 instruction sets and other optimizations. Intel does not guarantee the
+availability, functionality, or effectiveness of any optimization on microprocessors not
+manufactured by Intel.
+
+Microprocessor-dependent optimizations in this product are intended for use with Intel
+microprocessors. Certain optimizations not specific to Intel microarchitecture are reserved for
+Intel microprocessors. Please refer to the applicable product User and Reference Guides for
+more information regarding the specific instruction sets covered by this notice.
+
+Notice revision \#20110804
+
+\vspace*{0.5cm}
+
+{\bf Trademarks}
+
+Intel, Xeon, and Intel Xeon Phi are trademarks of Intel Corporation in the U.S. and/or other countries.
+
+This document is Copyright \textcopyright 2014, Intel Corporation. All rights reserved. 
+
+\pagenumbering{roman}
+\tableofcontents
+\pagenumbering{arabic}
+\hypersetup{pageanchor=true,citecolor=blue}

Propchange: openmp/trunk/offload/doc/doxygen/header.tex
------------------------------------------------------------------------------
    svn:executable = *

Added: openmp/trunk/offload/src/cean_util.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/cean_util.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/cean_util.cpp (added)
+++ openmp/trunk/offload/src/cean_util.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,344 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "cean_util.h"
+#include "offload_common.h"
+
+// 1. allocate element of CeanReadRanges type
+// 2. initialized it for reading consequently contiguous ranges
+//    described by "ap" argument
+CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
+{
+    CeanReadRanges * res;
+
+    // find the max contiguous range
+    int64_t rank = ap->rank - 1;
+    int64_t length = ap->dim[rank].size;
+    for (; rank >= 0; rank--) {
+        if (ap->dim[rank].stride == 1) {
+            length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
+            if (rank > 0 && length != ap->dim[rank - 1].size) {
+                break;
+            }
+        }
+        else {
+            break;
+        }
+    }
+
+    res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
+                                  (ap->rank - rank) * sizeof(CeanReadDim));
+    res->current_number = 0;
+    res->range_size = length;
+    res->last_noncont_ind = rank;
+
+    // calculate number of contiguous ranges inside noncontiguous dimensions
+    int count = 1;
+    bool prev_is_cont = true;
+    int64_t offset = 0;
+
+    for (; rank >= 0; rank--) {
+        res->Dim[rank].count = count;
+        res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
+        count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
+            (ap->dim[rank].upper - ap->dim[rank].lower +
+            ap->dim[rank].stride) / ap->dim[rank].stride);
+        prev_is_cont = false;
+        offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
+                 ap->dim[rank].size;
+    }
+    res->range_max_number = count;
+    res -> ptr = (void*)ap->base;
+    res -> init_offset = offset;
+    return res;
+}
+
+// check if ranges described by 1 argument could be transfered into ranges
+// described by 2-nd one
+bool cean_ranges_match(
+    CeanReadRanges * read_rng1,
+    CeanReadRanges * read_rng2
+)
+{
+    return ( read_rng1 == NULL || read_rng2 == NULL ||
+            (read_rng1->range_size % read_rng2->range_size == 0 ||
+            read_rng2->range_size % read_rng1->range_size == 0));
+}
+
+// Set next offset and length and returns true for next range.
+// Returns false if the ranges are over.
+bool get_next_range(
+    CeanReadRanges * read_rng,
+    int64_t *offset
+)
+{
+    if (++read_rng->current_number > read_rng->range_max_number) {
+        read_rng->current_number = 0;
+        return false;
+    }
+    int rank = 0;
+    int num = read_rng->current_number - 1;
+    int64_t cur_offset = 0;
+    int num_loc;
+    for (; rank <= read_rng->last_noncont_ind; rank++) {
+        num_loc = num / read_rng->Dim[rank].count;
+        cur_offset += num_loc * read_rng->Dim[rank].size;
+        num = num % read_rng->Dim[rank].count;
+    }
+    *offset = cur_offset + read_rng->init_offset;
+    return true;
+}
+
+bool is_arr_desc_contiguous(const arr_desc *ap)
+{
+    int64_t rank = ap->rank - 1;
+    int64_t length = ap->dim[rank].size;
+    for (; rank >= 0; rank--) {
+        if (ap->dim[rank].stride > 1 &&
+            ap->dim[rank].upper - ap->dim[rank].lower != 0) {
+                return false;
+        }
+        else if (length != ap->dim[rank].size) {
+            for (; rank >= 0; rank--) {
+                if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
+                    return false;
+                }
+            }
+            return true;
+        }
+        length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
+    }
+    return true;
+}
+
+int64_t cean_get_transf_size(CeanReadRanges * read_rng)
+{
+    return(read_rng->range_max_number * read_rng->range_size);
+}
+
+static uint64_t last_left, last_right;
+typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize);
+
+static void generate_one_range(
+    const char *spaces,
+    uint64_t lrange,
+    uint64_t rrange,
+    fpp fp,
+    int esize
+)
+{
+    OFFLOAD_TRACE(3,
+        "%s    generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
+        spaces, (void*)lrange, (void*)rrange, esize);
+    if (last_left == -1) {
+        // First range
+        last_left = lrange;
+    }
+    else {
+        if (lrange == last_right+1) {
+            // Extend previous range, don't print
+        }
+        else {
+            (*fp)(spaces, last_left, last_right, esize);
+            last_left = lrange;
+        }
+    }
+    last_right = rrange;
+}
+
+static void generate_mem_ranges_one_rank(
+    const char *spaces,
+    uint64_t base,
+    uint64_t rank,
+    const struct dim_desc *ddp,
+    fpp fp,
+    int esize
+)
+{
+    uint64_t lindex = ddp->lindex;
+    uint64_t lower = ddp->lower;
+    uint64_t upper = ddp->upper;
+    uint64_t stride = ddp->stride;
+    uint64_t size = ddp->size;
+    OFFLOAD_TRACE(3,
+        "%s    "
+        "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
+        "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
+        spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
+    if (rank == 1) {
+        uint64_t lrange, rrange;
+        if (stride == 1) {
+            lrange = base + (lower-lindex)*size;
+            rrange = lrange + (upper-lower+1)*size - 1;
+            generate_one_range(spaces, lrange, rrange, fp, esize);
+        }
+        else {
+            for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+                lrange = base + i*size;
+                rrange = lrange + size - 1;
+                generate_one_range(spaces, lrange, rrange, fp, esize);
+            }
+        }
+    }
+    else {
+        for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+            generate_mem_ranges_one_rank(
+                spaces, base+i*size, rank-1, ddp+1, fp, esize);
+
+        }
+    }
+}
+
+static void generate_mem_ranges(
+    const char *spaces,
+    const arr_desc *adp,
+    bool deref,
+    fpp fp
+)
+{
+    uint64_t esize;
+
+    OFFLOAD_TRACE(3,
+        "%s    "
+        "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
+        spaces, adp, deref);
+    last_left = -1;
+    last_right = -2;
+
+    // Element size is derived from last dimension
+    esize = adp->dim[adp->rank-1].size;
+
+    generate_mem_ranges_one_rank(
+        // For c_cean_var the base addr is the address of the data
+        // For c_cean_var_ptr the base addr is dereferenced to get to the data
+        spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
+        adp->rank, &adp->dim[0], fp, esize);
+    (*fp)(spaces, last_left, last_right, esize);
+}
+
+// returns offset and length of the data to be transferred
+void __arr_data_offset_and_length(
+    const arr_desc *adp,
+    int64_t &offset,
+    int64_t &length
+)
+{
+    int64_t rank = adp->rank - 1;
+    int64_t size = adp->dim[rank].size;
+    int64_t r_off = 0; // offset from right boundary
+
+    // find the rightmost dimension which takes just part of its
+    // range. We define it if the size of left rank is not equal
+    // the range's length between upper and lower boungaries
+    while (rank > 0) {
+        size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
+        if (size != adp->dim[rank - 1].size) {
+            break;
+        }
+        rank--;
+    }
+
+    offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
+             adp->dim[rank].size;
+
+    // find gaps both from the left - offset and from the right - r_off
+    for (rank--; rank >= 0; rank--) {
+        offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
+                  adp->dim[rank].size;
+        r_off += adp->dim[rank].size -
+                 (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
+                 adp->dim[rank + 1].size;
+    }
+    length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
+             adp->dim[0].size - offset - r_off;
+}
+
+#if OFFLOAD_DEBUG > 0
+
+void print_range(
+    const char *spaces,
+    uint64_t low,
+    uint64_t high,
+    int esize
+)
+{
+    char buffer[1024];
+    char number[32];
+
+    OFFLOAD_TRACE(3, "%s        print_range(low=%p, high=%p, esize=%d)\n",
+        spaces, (void*)low, (void*)high, esize);
+
+    if (console_enabled < 4) {
+        return;
+    }
+    OFFLOAD_TRACE(4, "%s            values:\n", spaces);
+    int count = 0;
+    buffer[0] = '\0';
+    while (low <= high)
+    {
+        switch (esize)
+        {
+        case 1:
+            sprintf(number, "%d ", *((char *)low));
+            low += 1;
+            break;
+        case 2:
+            sprintf(number, "%d ", *((short *)low));
+            low += 2;
+            break;
+        case 4:
+            sprintf(number, "%d ", *((int *)low));
+            low += 4;
+            break;
+        default:
+            sprintf(number, "0x%016x ", *((uint64_t *)low));
+            low += 8;
+            break;
+        }
+        strcat(buffer, number);
+        count++;
+        if (count == 10) {
+            OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer);
+            count = 0;
+            buffer[0] = '\0';
+        }
+    }
+    if (count != 0) {
+        OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer);
+    }
+}
+
+void __arr_desc_dump(
+    const char *spaces,
+    const char *name,
+    const arr_desc *adp,
+    bool deref
+)
+{
+    OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
+
+    if (adp != 0) {
+        OFFLOAD_TRACE(2, "%s    base=%llx, rank=%lld\n",
+            spaces, adp->base, adp->rank);
+
+        for (int i = 0; i < adp->rank; i++) {
+            OFFLOAD_TRACE(2,
+                          "%s    dimension %d: size=%lld, lindex=%lld, "
+                          "lower=%lld, upper=%lld, stride=%lld\n",
+                          spaces, i, adp->dim[i].size, adp->dim[i].lindex,
+                          adp->dim[i].lower, adp->dim[i].upper,
+                          adp->dim[i].stride);
+        }
+        // For c_cean_var the base addr is the address of the data
+        // For c_cean_var_ptr the base addr is dereferenced to get to the data
+        generate_mem_ranges(spaces, adp, deref, &print_range);
+    }
+}
+#endif // OFFLOAD_DEBUG

Added: openmp/trunk/offload/src/cean_util.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/cean_util.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/cean_util.h (added)
+++ openmp/trunk/offload/src/cean_util.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,96 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef CEAN_UTIL_H_INCLUDED
+#define CEAN_UTIL_H_INCLUDED
+
+#include <stdint.h>
+
+// CEAN expression representation
+struct dim_desc {
+    int64_t size;       // Length of data type
+    int64_t lindex;     // Lower index
+    int64_t lower;      // Lower section bound
+    int64_t upper;      // Upper section bound
+    int64_t stride;     // Stride
+};
+
+struct arr_desc {
+    int64_t base;       // Base address
+    int64_t rank;       // Rank of array
+    dim_desc dim[1];
+};
+
+struct CeanReadDim {
+    int64_t count; // The number of elements in this dimension
+    int64_t size;  // The number of bytes between successive
+                   // elements in this dimension.
+};
+
+struct CeanReadRanges {
+    void *  ptr;
+    int64_t current_number;   // the number of ranges read
+    int64_t range_max_number; // number of contiguous ranges
+    int64_t range_size;       // size of max contiguous range
+    int     last_noncont_ind; // size of Dim array
+    int64_t init_offset;      // offset of 1-st element from array left bound
+    CeanReadDim Dim[1];
+};
+
+// array descriptor length
+#define __arr_desc_length(rank) \
+    (sizeof(int64_t) + sizeof(dim_desc) * (rank))
+
+// returns offset and length of the data to be transferred
+void __arr_data_offset_and_length(const arr_desc *adp,
+                                  int64_t &offset,
+                                  int64_t &length);
+
+// define if data array described by argument is contiguous one
+bool is_arr_desc_contiguous(const arr_desc *ap);
+
+// allocate element of CeanReadRanges type initialized
+// to read consequently contiguous ranges described by "ap" argument
+CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap);
+
+// check if ranges described by 1 argument could be transfered into ranges
+// described by 2-nd one
+bool cean_ranges_match(
+    CeanReadRanges * read_rng1,
+    CeanReadRanges * read_rng2
+);
+
+// first argument - returned value by call to init_read_ranges_arr_desc.
+// returns true if offset and length of next range is set successfuly.
+// returns false if the ranges is over.
+bool get_next_range(
+    CeanReadRanges * read_rng,
+    int64_t *offset
+);
+
+// returns number of transfered bytes
+int64_t cean_get_transf_size(CeanReadRanges * read_rng);
+
+#if OFFLOAD_DEBUG > 0
+// prints array descriptor contents to stderr
+void    __arr_desc_dump(
+    const char *spaces,
+    const char *name,
+    const arr_desc *adp,
+    bool dereference);
+#else
+#define __arr_desc_dump(
+    spaces,
+    name,
+    adp,
+    dereference)
+#endif // OFFLOAD_DEBUG
+
+#endif // CEAN_UTIL_H_INCLUDED

Added: openmp/trunk/offload/src/coi/coi_client.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/coi/coi_client.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/coi/coi_client.cpp (added)
+++ openmp/trunk/offload/src/coi/coi_client.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,350 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The COI host interface
+
+#include "coi_client.h"
+#include "../offload_common.h"
+
+namespace COI {
+
+#define COI_VERSION1    "COI_1.0"
+#define COI_VERSION2    "COI_2.0"
+
+bool            is_available;
+static void*    lib_handle;
+
+// pointers to functions from COI library
+COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+
+COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, const void*,
+                                     uint64_t, int, const char**, uint8_t,
+                                     const char**, uint8_t, const char*,
+                                     uint64_t, const char*, const char*,
+                                     uint64_t, COIPROCESS*);
+COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*);
+COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, const char**,
+                                       COIFUNCTION*);
+COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, const void*, uint64_t,
+                                          const char*, const char*,
+                                          const char*, uint64_t, uint32_t,
+                                          COILIBRARY*);
+COIRESULT (*ProcessRegisterLibraries)(uint32_t, const void**, const uint64_t*,
+                                      const char**, const uint64_t*);
+
+COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*);
+COIRESULT (*PipelineDestroy)(COIPIPELINE);
+COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION, uint32_t,
+                                 const COIBUFFER*, const COI_ACCESS_FLAGS*,
+                                 uint32_t, const COIEVENT*, const void*,
+                                 uint16_t, void*, uint16_t, COIEVENT*);
+
+COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*,
+                          uint32_t, const COIPROCESS*, COIBUFFER*);
+COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+                                    void*, uint32_t, const COIPROCESS*,
+                                    COIBUFFER*);
+COIRESULT (*BufferDestroy)(COIBUFFER);
+COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t,
+                       const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, void**);
+COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, uint64_t,
+                         COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE,
+                        uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
+                        COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+                            COI_BUFFER_MOVE_FLAG, uint32_t,
+                            const   COIEVENT*, COIEVENT*);
+
+COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*,
+                       uint32_t*);
+
+uint64_t  (*PerfGetCycleFrequency)(void);
+
+bool init(void)
+{
+#ifndef TARGET_WINNT
+    const char *lib_name = "libcoi_host.so.0";
+#else // TARGET_WINNT
+    const char *lib_name = "coi_host.dll";
+#endif // TARGET_WINNT
+
+    OFFLOAD_DEBUG_TRACE(2, "Loading COI library %s ...\n", lib_name);
+    lib_handle = DL_open(lib_name);
+    if (lib_handle == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to load the library\n");
+        return false;
+    }
+
+    EngineGetCount =
+        (COIRESULT (*)(COI_ISA_TYPE, uint32_t*))
+            DL_sym(lib_handle, "COIEngineGetCount", COI_VERSION1);
+    if (EngineGetCount == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIEngineGetCount");
+        fini();
+        return false;
+    }
+
+    EngineGetHandle =
+        (COIRESULT (*)(COI_ISA_TYPE, uint32_t, COIENGINE*))
+            DL_sym(lib_handle, "COIEngineGetHandle", COI_VERSION1);
+    if (EngineGetHandle == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIEngineGetHandle");
+        fini();
+        return false;
+    }
+
+    ProcessCreateFromMemory =
+        (COIRESULT (*)(COIENGINE, const char*, const void*, uint64_t, int,
+                       const char**, uint8_t, const char**, uint8_t,
+                       const char*, uint64_t, const char*, const char*,
+                       uint64_t, COIPROCESS*))
+            DL_sym(lib_handle, "COIProcessCreateFromMemory", COI_VERSION1);
+    if (ProcessCreateFromMemory == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessCreateFromMemory");
+        fini();
+        return false;
+    }
+
+    ProcessDestroy =
+        (COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*,
+                       uint32_t*))
+            DL_sym(lib_handle, "COIProcessDestroy", COI_VERSION1);
+    if (ProcessDestroy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessDestroy");
+        fini();
+        return false;
+    }
+
+    ProcessGetFunctionHandles =
+        (COIRESULT (*)(COIPROCESS, uint32_t, const char**, COIFUNCTION*))
+            DL_sym(lib_handle, "COIProcessGetFunctionHandles", COI_VERSION1);
+    if (ProcessGetFunctionHandles == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessGetFunctionHandles");
+        fini();
+        return false;
+    }
+
+    ProcessLoadLibraryFromMemory =
+        (COIRESULT (*)(COIPROCESS, const void*, uint64_t, const char*,
+                       const char*, const char*, uint64_t, uint32_t,
+                       COILIBRARY*))
+            DL_sym(lib_handle, "COIProcessLoadLibraryFromMemory", COI_VERSION2);
+    if (ProcessLoadLibraryFromMemory == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessLoadLibraryFromMemory");
+        fini();
+        return false;
+    }
+
+    ProcessRegisterLibraries =
+        (COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**,
+                       const uint64_t*))
+            DL_sym(lib_handle, "COIProcessRegisterLibraries", COI_VERSION1);
+    if (ProcessRegisterLibraries == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessRegisterLibraries");
+        fini();
+        return false;
+    }
+
+    PipelineCreate =
+        (COIRESULT (*)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*))
+            DL_sym(lib_handle, "COIPipelineCreate", COI_VERSION1);
+    if (PipelineCreate == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPipelineCreate");
+        fini();
+        return false;
+    }
+
+    PipelineDestroy =
+        (COIRESULT (*)(COIPIPELINE))
+            DL_sym(lib_handle, "COIPipelineDestroy", COI_VERSION1);
+    if (PipelineDestroy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPipelineDestroy");
+        fini();
+        return false;
+    }
+
+    PipelineRunFunction =
+        (COIRESULT (*)(COIPIPELINE, COIFUNCTION, uint32_t, const COIBUFFER*,
+                       const COI_ACCESS_FLAGS*, uint32_t, const COIEVENT*,
+                       const void*, uint16_t, void*, uint16_t, COIEVENT*))
+            DL_sym(lib_handle, "COIPipelineRunFunction", COI_VERSION1);
+    if (PipelineRunFunction == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPipelineRunFunction");
+        fini();
+        return false;
+    }
+
+    BufferCreate =
+        (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*,
+                       uint32_t, const COIPROCESS*, COIBUFFER*))
+            DL_sym(lib_handle, "COIBufferCreate", COI_VERSION1);
+    if (BufferCreate == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferCreate");
+        fini();
+        return false;
+    }
+
+    BufferCreateFromMemory =
+        (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, void*,
+                       uint32_t, const COIPROCESS*, COIBUFFER*))
+            DL_sym(lib_handle, "COIBufferCreateFromMemory", COI_VERSION1);
+    if (BufferCreateFromMemory == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferCreateFromMemory");
+        fini();
+        return false;
+    }
+
+    BufferDestroy =
+        (COIRESULT (*)(COIBUFFER))
+            DL_sym(lib_handle, "COIBufferDestroy", COI_VERSION1);
+    if (BufferDestroy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferDestroy");
+        fini();
+        return false;
+    }
+
+    BufferMap =
+        (COIRESULT (*)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t,
+                       const COIEVENT*, COIEVENT*, COIMAPINSTANCE*,
+                       void**))
+            DL_sym(lib_handle, "COIBufferMap", COI_VERSION1);
+    if (BufferMap == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferMap");
+        fini();
+        return false;
+    }
+
+    BufferUnmap =
+        (COIRESULT (*)(COIMAPINSTANCE, uint32_t, const COIEVENT*,
+                       COIEVENT*))
+            DL_sym(lib_handle, "COIBufferUnmap", COI_VERSION1);
+    if (BufferUnmap == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferUnmap");
+        fini();
+        return false;
+    }
+
+    BufferWrite =
+        (COIRESULT (*)(COIBUFFER, uint64_t, const void*, uint64_t,
+                       COI_COPY_TYPE, uint32_t, const COIEVENT*,
+                       COIEVENT*))
+            DL_sym(lib_handle, "COIBufferWrite", COI_VERSION1);
+    if (BufferWrite == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferWrite");
+        fini();
+        return false;
+    }
+
+    BufferRead =
+        (COIRESULT (*)(COIBUFFER, uint64_t, void*, uint64_t,
+                                     COI_COPY_TYPE, uint32_t,
+                                     const COIEVENT*, COIEVENT*))
+            DL_sym(lib_handle, "COIBufferRead", COI_VERSION1);
+    if (BufferRead == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferRead");
+        fini();
+        return false;
+    }
+
+    BufferCopy =
+        (COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
+                       COI_COPY_TYPE, uint32_t, const COIEVENT*,
+                       COIEVENT*))
+            DL_sym(lib_handle, "COIBufferCopy", COI_VERSION1);
+    if (BufferCopy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferCopy");
+        fini();
+        return false;
+    }
+
+    BufferGetSinkAddress =
+        (COIRESULT (*)(COIBUFFER, uint64_t*))
+            DL_sym(lib_handle, "COIBufferGetSinkAddress", COI_VERSION1);
+    if (BufferGetSinkAddress == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferGetSinkAddress");
+        fini();
+        return false;
+    }
+
+    BufferSetState =
+        (COIRESULT(*)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+                      COI_BUFFER_MOVE_FLAG, uint32_t, const COIEVENT*,
+                      COIEVENT*))
+            DL_sym(lib_handle, "COIBufferSetState", COI_VERSION1);
+    if (BufferSetState == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferSetState");
+        fini();
+        return false;
+    }
+
+    EventWait =
+        (COIRESULT (*)(uint16_t, const COIEVENT*, int32_t, uint8_t,
+                       uint32_t*, uint32_t*))
+            DL_sym(lib_handle, "COIEventWait", COI_VERSION1);
+    if (EventWait == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIEventWait");
+        fini();
+        return false;
+    }
+
+    PerfGetCycleFrequency =
+        (uint64_t (*)(void))
+            DL_sym(lib_handle, "COIPerfGetCycleFrequency", COI_VERSION1);
+    if (PerfGetCycleFrequency == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPerfGetCycleFrequency");
+        fini();
+        return false;
+    }
+
+    is_available = true;
+
+    return true;
+}
+
+void fini(void)
+{
+    is_available = false;
+
+    if (lib_handle != 0) {
+#ifndef TARGET_WINNT
+        DL_close(lib_handle);
+#endif // TARGET_WINNT
+        lib_handle = 0;
+    }
+}
+
+} // namespace COI

Added: openmp/trunk/offload/src/coi/coi_client.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/coi/coi_client.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/coi/coi_client.h (added)
+++ openmp/trunk/offload/src/coi/coi_client.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,118 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The interface betwen offload library and the COI API on the host
+
+#ifndef COI_CLIENT_H_INCLUDED
+#define COI_CLIENT_H_INCLUDED
+
+#include <common/COIPerf_common.h>
+#include <source/COIEngine_source.h>
+#include <source/COIProcess_source.h>
+#include <source/COIPipeline_source.h>
+#include <source/COIBuffer_source.h>
+#include <source/COIEvent_source.h>
+
+#include <string.h>
+
+#include "../liboffload_error_codes.h"
+#include "../offload_util.h"
+
+#define MIC_ENGINES_MAX     128
+
+#if MIC_ENGINES_MAX < COI_MAX_ISA_MIC_DEVICES
+#error MIC_ENGINES_MAX need to be increased
+#endif
+
+// COI library interface
+namespace COI {
+
+extern bool init(void);
+extern void fini(void);
+
+extern bool is_available;
+
+// pointers to functions from COI library
+extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+
+extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
+                                           const void*, uint64_t, int,
+                                           const char**, uint8_t,
+                                           const char**, uint8_t,
+                                           const char*, uint64_t,
+                                           const char*,
+                                           const char*, uint64_t,
+                                           COIPROCESS*);
+extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
+                                  int8_t*, uint32_t*);
+extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
+                                             const char**,
+                                             COIFUNCTION*);
+extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
+                                                const void*,
+                                                uint64_t,
+                                                const char*,
+                                                const char*,
+                                                const char*,
+                                                uint64_t,
+                                                uint32_t,
+                                                COILIBRARY*);
+extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
+                                            const void**,
+                                            const uint64_t*,
+                                            const char**,
+                                            const uint64_t*);
+
+extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
+                                  COIPIPELINE*);
+extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
+extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
+                                       uint32_t, const COIBUFFER*,
+                                       const COI_ACCESS_FLAGS*,
+                                       uint32_t, const COIEVENT*,
+                                       const void*, uint16_t, void*,
+                                       uint16_t, COIEVENT*);
+
+extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+                                const void*, uint32_t,
+                                const COIPROCESS*, COIBUFFER*);
+extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
+                                          uint32_t, void*,
+                                          uint32_t, const COIPROCESS*,
+                                          COIBUFFER*);
+extern COIRESULT (*BufferDestroy)(COIBUFFER);
+extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
+                             COI_MAP_TYPE, uint32_t, const COIEVENT*,
+                             COIEVENT*, COIMAPINSTANCE*, void**);
+extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
+                               const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
+                               uint64_t, COI_COPY_TYPE, uint32_t,
+                               const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
+                              COI_COPY_TYPE, uint32_t,
+                              const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
+                              uint64_t, COI_COPY_TYPE, uint32_t,
+                              const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+                                   COI_BUFFER_MOVE_FLAG, uint32_t,
+                                   const   COIEVENT*, COIEVENT*);
+
+extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
+                           uint8_t, uint32_t*, uint32_t*);
+
+extern uint64_t  (*PerfGetCycleFrequency)(void);
+
+} // namespace COI
+
+#endif // COI_CLIENT_H_INCLUDED

Added: openmp/trunk/offload/src/coi/coi_server.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/coi/coi_server.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/coi/coi_server.cpp (added)
+++ openmp/trunk/offload/src/coi/coi_server.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,130 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The COI interface on the target
+
+#include "coi_server.h"
+
+#include "../offload_target.h"
+#include "../offload_timer.h"
+#ifdef MYO_SUPPORT
+#include "../offload_myo_target.h"      // for __offload_myoLibInit/Fini
+#endif // MYO_SUPPORT
+
+COINATIVELIBEXPORT
+void server_compute(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    OffloadDescriptor::offload(buffer_count, buffers,
+                               misc_data, misc_data_len,
+                               return_data, return_data_len);
+}
+
+COINATIVELIBEXPORT
+void server_init(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    struct init_data {
+        int  device_index;
+        int  devices_total;
+        int  console_level;
+        int  offload_report_level;
+    } *data = (struct init_data*) misc_data;
+
+    // set device index and number of total devices
+    mic_index = data->device_index;
+    mic_engines_total = data->devices_total;
+
+    // initialize trace level
+    console_enabled = data->console_level;
+    offload_report_level = data->offload_report_level;
+
+    // return back the process id
+    *((pid_t*) return_data) = getpid();
+}
+
+COINATIVELIBEXPORT
+void server_var_table_size(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    struct Params {
+        int64_t nelems;
+        int64_t length;
+    } *params;
+
+    params = static_cast<Params*>(return_data);
+    params->length = __offload_vars.table_size(params->nelems);
+}
+
+COINATIVELIBEXPORT
+void server_var_table_copy(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    __offload_vars.table_copy(buffers[0], *static_cast<int64_t*>(misc_data));
+}
+
+#ifdef MYO_SUPPORT
+// temporary workaround for blocking behavior of myoiLibInit/Fini calls
+COINATIVELIBEXPORT
+void server_myoinit(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    __offload_myoLibInit();
+}
+
+COINATIVELIBEXPORT
+void server_myofini(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    __offload_myoLibFini();
+}
+#endif // MYO_SUPPORT

Added: openmp/trunk/offload/src/coi/coi_server.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/coi/coi_server.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/coi/coi_server.h (added)
+++ openmp/trunk/offload/src/coi/coi_server.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//The interface betwen offload library and the COI API on the target.
+
+#ifndef COI_SERVER_H_INCLUDED
+#define COI_SERVER_H_INCLUDED
+
+#include <common/COIEngine_common.h>
+#include <common/COIPerf_common.h>
+#include <sink/COIProcess_sink.h>
+#include <sink/COIPipeline_sink.h>
+#include <sink/COIBuffer_sink.h>
+#include <list>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "../liboffload_error_codes.h"
+
+// wrappers for COI API
+#define PipelineStartExecutingRunFunctions() \
+    { \
+        COIRESULT res = COIPipelineStartExecutingRunFunctions(); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_pipeline_start_run_funcs, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define ProcessWaitForShutdown() \
+    { \
+        COIRESULT res = COIProcessWaitForShutdown(); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_process_wait_shutdown, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define BufferAddRef(buf) \
+    { \
+        COIRESULT res = COIBufferAddRef(buf); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_buf_add_ref, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define BufferReleaseRef(buf) \
+    { \
+        COIRESULT res = COIBufferReleaseRef(buf); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_buf_release_ref, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define EngineGetIndex(index) \
+    { \
+        COI_ISA_TYPE isa_type; \
+        COIRESULT res = COIEngineGetIndex(&isa_type, index); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_get_engine_index, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#endif // COI_SERVER_H_INCLUDED

Added: openmp/trunk/offload/src/compiler_if_host.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/compiler_if_host.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/compiler_if_host.cpp (added)
+++ openmp/trunk/offload/src/compiler_if_host.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,323 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "compiler_if_host.h"
+
+#include <malloc.h>
+#ifndef TARGET_WINNT
+#include <alloca.h>
+#endif // TARGET_WINNT
+
+// Global counter on host. 
+// This variable is used if P2OPT_offload_do_data_persistence == 2.
+// The variable used to identify offload constructs contained in one procedure.
+// Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
+// offload constructs.
+static int offload_call_count = 0;
+
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
+    TARGET_TYPE      target_type,
+    int              target_number,
+    int              is_optional,
+    _Offload_status* status,
+    const char*      file,
+    uint64_t         line
+)
+{
+    bool retval;
+    OFFLOAD ofld;
+
+    // initialize status
+    if (status != 0) {
+        status->result = OFFLOAD_UNAVAILABLE;
+        status->device_number = -1;
+        status->data_sent = 0;
+        status->data_received = 0;
+    }
+
+    // make sure libray is initialized
+    retval = __offload_init_library();
+
+    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+    // initalize all devices is init_type is on_offload_all
+    if (retval && __offload_init_type == c_init_on_offload_all) {
+        for (int i = 0; i < mic_engines_total; i++) {
+             mic_engines[i].init();
+        }
+    }
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+    if (target_type == TARGET_HOST) {
+        // Host always available
+        retval = true;
+    }
+    else if (target_type == TARGET_MIC) {
+        if (target_number >= -1) {
+            if (retval) {
+                if (target_number >= 0) {
+                    // User provided the device number
+                    target_number = target_number % mic_engines_total;
+                }
+                else {
+                    // use device 0
+                    target_number = 0;
+                }
+
+                // reserve device in ORSL
+                if (is_optional) {
+                    if (!ORSL::try_reserve(target_number)) {
+                        target_number = -1;
+                    }
+                }
+                else {
+                    if (!ORSL::reserve(target_number)) {
+                        target_number = -1;
+                    }
+                }
+
+                // initialize device
+                if (target_number >= 0 &&
+                    __offload_init_type == c_init_on_offload) {
+                    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+                    mic_engines[target_number].init();
+                    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+                }
+            }
+            else {
+                // fallback to CPU
+                target_number = -1;
+            }
+
+            if (target_number < 0 || !retval) {
+                if (!is_optional && status == 0) {
+                    LIBOFFLOAD_ERROR(c_device_is_not_available);
+                    exit(1);
+                }
+
+                retval = false;
+            }
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_device_number);
+            exit(1);
+        }
+    }
+
+    if (retval) {
+        ofld = new OffloadDescriptor(target_number, status,
+                                     !is_optional, false, timer_data);
+        OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+        Offload_Report_Prolog(timer_data);
+        OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+                              "Starting offload: target_type = %d, "
+                              "number = %d, is_optional = %d\n",
+                              target_type, target_number, is_optional);
+
+        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+    }
+    else {
+        ofld = NULL;
+
+        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
+        offload_report_free_data(timer_data);
+    }
+
+    return ofld;
+}
+
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
+    const int*  device_num,
+    const char* file,
+    uint64_t    line
+)
+{
+    int target_number;
+
+    // make sure libray is initialized and at least one device is available
+    if (!__offload_init_library()) {
+        LIBOFFLOAD_ERROR(c_device_is_not_available);
+        exit(1);
+    }
+
+    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+
+    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+    if (__offload_init_type == c_init_on_offload_all) {
+        for (int i = 0; i < mic_engines_total; i++) {
+             mic_engines[i].init();
+        }
+    }
+
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+    // use default device number if it is not provided
+    if (device_num != 0) {
+        target_number = *device_num;
+    }
+    else {
+        target_number = __omp_device_num;
+    }
+
+    // device number should be a non-negative integer value
+    if (target_number < 0) {
+        LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
+        exit(1);
+    }
+
+    // should we do this for OpenMP?
+    target_number %= mic_engines_total;
+
+    // reserve device in ORSL
+    if (!ORSL::reserve(target_number)) {
+        LIBOFFLOAD_ERROR(c_device_is_not_available);
+        exit(1);
+    }
+
+    // initialize device(s)
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+    if (__offload_init_type == c_init_on_offload) {
+        mic_engines[target_number].init();
+    }
+
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+    OFFLOAD ofld =
+        new OffloadDescriptor(target_number, 0, true, true, timer_data);
+
+    OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+
+    Offload_Report_Prolog(timer_data);
+
+    OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+                          "Starting OpenMP offload, device = %d\n",
+                          target_number);
+
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+
+    return ofld;
+}
+
+int offload_offload_wrap(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void **waits,
+    const void **signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
+                             waits, num_waits, signal, entry_id, stack_addr);
+    if (!ret || signal == 0) {
+        delete ofld;
+    }
+    return ret;
+}
+
+extern "C" int OFFLOAD_OFFLOAD1(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void **waits,
+    const void **signal
+)
+{
+    return offload_offload_wrap(ofld, name, is_empty,
+                            num_vars, vars, vars2,
+                            num_waits, waits,
+                            signal, NULL, NULL);
+}
+
+extern "C" int OFFLOAD_OFFLOAD2(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void** signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    return offload_offload_wrap(ofld, name, is_empty,
+                            num_vars, vars, vars2,
+                            num_waits, waits,
+                            signal, entry_id, stack_addr);
+}
+
+extern "C" int OFFLOAD_OFFLOAD(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void **waits,
+    const void *signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    // signal is passed by reference now
+    const void **signal_new = (signal != 0) ? &signal : 0;
+    const void **waits_new = 0;
+    int num_waits_new = 0;
+
+    // remove NULL values from the list of signals to wait for
+    if (num_waits > 0) {
+        waits_new = (const void**) alloca(sizeof(void*) * num_waits);
+        for (int i = 0; i < num_waits; i++) {
+            if (waits[i] != 0) {
+                waits_new[num_waits_new++] = waits[i];
+            }
+        }
+    }
+
+    return OFFLOAD_OFFLOAD1(ofld, name, is_empty,
+                            num_vars, vars, vars2,
+                            num_waits_new, waits_new,
+                            signal_new);
+}
+
+extern "C" int OFFLOAD_CALL_COUNT()
+{
+    offload_call_count++;
+    return offload_call_count;
+}

Added: openmp/trunk/offload/src/compiler_if_host.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/compiler_if_host.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/compiler_if_host.h (added)
+++ openmp/trunk/offload/src/compiler_if_host.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,133 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The interface between compiler-generated host code and runtime library
+*/
+
+#ifndef COMPILER_IF_HOST_H_INCLUDED
+#define COMPILER_IF_HOST_H_INCLUDED
+
+#include "offload_host.h"
+
+#define OFFLOAD_TARGET_ACQUIRE          OFFLOAD_PREFIX(target_acquire)
+#define OFFLOAD_TARGET_ACQUIRE1         OFFLOAD_PREFIX(target_acquire1)
+#define OFFLOAD_OFFLOAD                 OFFLOAD_PREFIX(offload)
+#define OFFLOAD_OFFLOAD1                OFFLOAD_PREFIX(offload1)
+#define OFFLOAD_OFFLOAD2                OFFLOAD_PREFIX(offload2)
+#define OFFLOAD_CALL_COUNT              OFFLOAD_PREFIX(offload_call_count)
+
+
+/*! \fn OFFLOAD_TARGET_ACQUIRE
+    \brief Attempt to acquire the target.
+    \param target_type   The type of target.
+    \param target_number The device number.
+    \param is_optional   Whether CPU fall-back is allowed.
+    \param status        Address of variable to hold offload status.
+    \param file          Filename in which this offload occurred.
+    \param line          Line number in the file where this offload occurred.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
+    TARGET_TYPE      target_type,
+    int              target_number,
+    int              is_optional,
+    _Offload_status* status,
+    const char*      file,
+    uint64_t         line
+);
+
+/*! \fn OFFLOAD_TARGET_ACQUIRE1
+    \brief Acquire the target for offload (OpenMP).
+    \param device_number Device number or null if not specified.
+    \param file          Filename in which this offload occurred
+    \param line          Line number in the file where this offload occurred.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
+    const int*      device_number,
+    const char*     file,
+    uint64_t        line
+);
+
+/*! \fn OFFLOAD_OFFLOAD1
+    \brief Run function on target using interface for old data persistence.
+    \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+    \param name Name of offload entry point.
+    \param is_empty If no code to execute (e.g. offload_transfer)
+    \param num_vars Number of variable descriptors.
+    \param vars Pointer to VarDesc array.
+    \param vars2 Pointer to VarDesc2 array.
+    \param num_waits Number of "wait" values.
+    \param waits Pointer to array of wait values.
+    \param signal Pointer to signal value or NULL.
+*/
+extern "C" int OFFLOAD_OFFLOAD1(
+    OFFLOAD o,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void** signal
+);
+
+/*! \fn OFFLOAD_OFFLOAD2
+    \brief Run function on target using interface for new data persistence.
+    \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+    \param name Name of offload entry point.
+    \param is_empty If no code to execute (e.g. offload_transfer)
+    \param num_vars Number of variable descriptors.
+    \param vars Pointer to VarDesc array.
+    \param vars2 Pointer to VarDesc2 array.
+    \param num_waits Number of "wait" values.
+    \param waits Pointer to array of wait values.
+    \param signal Pointer to signal value or NULL.
+    \param entry_id A signature for the function doing the offload.
+    \param stack_addr The stack frame address of the function doing offload.
+*/
+extern "C" int OFFLOAD_OFFLOAD2(
+    OFFLOAD o,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void** signal,
+    int entry_id,
+    const void *stack_addr
+);
+
+// Run function on target (obsolete).
+// @param o    OFFLOAD object
+// @param name function name
+extern "C" int OFFLOAD_OFFLOAD(
+    OFFLOAD o,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void* signal,
+    int entry_id = 0,
+    const void *stack_addr = NULL
+);
+
+// Global counter on host.
+// This variable is used if P2OPT_offload_do_data_persistence == 2.
+// The variable used to identify offload constructs contained in one procedure.
+// Call to OFFLOAD_CALL_COUNT() is inserted at HOST on entry of the routine.
+extern "C" int  OFFLOAD_CALL_COUNT();
+
+#endif // COMPILER_IF_HOST_H_INCLUDED

Added: openmp/trunk/offload/src/compiler_if_target.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/compiler_if_target.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/compiler_if_target.cpp (added)
+++ openmp/trunk/offload/src/compiler_if_target.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "compiler_if_target.h"
+
+extern "C" void OFFLOAD_TARGET_ENTER(
+    OFFLOAD ofld,
+    int vars_total,
+    VarDesc *vars,
+    VarDesc2 *vars2
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p, %d, %p, %p)\n", __func__, ofld,
+                        vars_total, vars, vars2);
+    ofld->merge_var_descs(vars, vars2, vars_total);
+    ofld->scatter_copyin_data();
+}
+
+extern "C" void OFFLOAD_TARGET_LEAVE(
+    OFFLOAD ofld
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ofld);
+    ofld->gather_copyout_data();
+}
+
+extern "C" void OFFLOAD_TARGET_MAIN(void)
+{
+    // initialize target part
+    __offload_target_init();
+
+    // pass control to COI
+    PipelineStartExecutingRunFunctions();
+    ProcessWaitForShutdown();
+
+    OFFLOAD_DEBUG_TRACE(2, "Exiting main...\n");
+}

Added: openmp/trunk/offload/src/compiler_if_target.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/compiler_if_target.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/compiler_if_target.h (added)
+++ openmp/trunk/offload/src/compiler_if_target.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The interface between compiler-generated target code and runtime library
+*/
+
+#ifndef COMPILER_IF_TARGET_H_INCLUDED
+#define COMPILER_IF_TARGET_H_INCLUDED
+
+#include "offload_target.h"
+
+#define OFFLOAD_TARGET_ENTER            OFFLOAD_PREFIX(target_enter)
+#define OFFLOAD_TARGET_LEAVE            OFFLOAD_PREFIX(target_leave)
+#define OFFLOAD_TARGET_MAIN             OFFLOAD_PREFIX(target_main)
+
+/*! \fn OFFLOAD_TARGET_ENTER
+    \brief Fill in variable addresses using VarDesc array.
+    \brief Then call back the runtime library to fetch data.
+    \param ofld         Offload descriptor created by runtime.
+    \param var_desc_num Number of variable descriptors.
+    \param var_desc     Pointer to VarDesc array.
+    \param var_desc2    Pointer to VarDesc2 array.
+*/
+extern "C" void OFFLOAD_TARGET_ENTER(
+    OFFLOAD ofld,
+    int var_desc_num,
+    VarDesc *var_desc,
+    VarDesc2 *var_desc2
+);
+
+/*! \fn OFFLOAD_TARGET_LEAVE
+    \brief Call back the runtime library to gather outputs using VarDesc array.
+    \param ofld Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+*/
+extern "C" void OFFLOAD_TARGET_LEAVE(
+    OFFLOAD ofld
+);
+
+// Entry point for the target application.
+extern "C" void OFFLOAD_TARGET_MAIN(void);
+
+#endif // COMPILER_IF_TARGET_H_INCLUDED

Added: openmp/trunk/offload/src/dv_util.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/dv_util.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/dv_util.cpp (added)
+++ openmp/trunk/offload/src/dv_util.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,131 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_common.h"
+
+bool __dv_is_contiguous(const ArrDesc *dvp)
+{
+    if (dvp->Flags & ArrDescFlagsContiguous) {
+        return true;
+    }
+
+    if (dvp->Rank != 0) {
+        if (dvp->Dim[0].Mult != dvp->Len) {
+            return false;
+        }
+        for (int i = 1; i < dvp->Rank; i++) {
+            if (dvp->Dim[i].Mult !=
+                dvp->Dim[i-1].Extent * dvp->Dim[i-1].Mult) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+bool __dv_is_allocated(const ArrDesc *dvp)
+{
+    return (dvp->Flags & ArrDescFlagsDefined);
+}
+
+uint64_t __dv_data_length(const ArrDesc *dvp)
+{
+    uint64_t size;
+
+    if (dvp->Rank == 0) {
+        size = dvp->Len;
+        return size;
+    }
+
+    size = dvp->Len;
+    for (int i = 0; i < dvp->Rank; ++i) {
+        size += (dvp->Dim[i].Extent-1) * dvp->Dim[i].Mult;
+    }
+    return size;
+}
+
+uint64_t __dv_data_length(const ArrDesc *dvp, int64_t count)
+{
+    if (dvp->Rank == 0) {
+        return count;
+    }
+
+    return count * dvp->Dim[0].Mult;
+}
+
+// Create CeanReadRanges data for reading contiguous ranges of
+// noncontiguous array defined by the argument
+CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp)
+{
+    int64_t         len;
+    int             count;
+    int             rank = dvp->Rank;
+    CeanReadRanges *res = NULL;
+
+    if (rank != 0) {
+        int i = 0;
+        len = dvp->Len;
+        if (dvp->Dim[0].Mult == len) {
+            for (i = 1; i < rank; i++) {
+                len *= dvp->Dim[i-1].Extent;
+                if (dvp->Dim[i].Mult != len) {
+                    break;
+                }
+            }
+        }
+        res = (CeanReadRanges *)malloc(
+            sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim));
+        res -> last_noncont_ind = rank - i - 1;
+        count = 1;
+        for (; i < rank; i++) {
+            res->Dim[rank - i - 1].count = count;
+            res->Dim[rank - i - 1].size = dvp->Dim[i].Mult;
+            count *= dvp->Dim[i].Extent;
+        }
+        res -> range_max_number = count;
+        res -> range_size = len;
+        res -> ptr = (void*)dvp->Base;
+        res -> current_number = 0;
+        res -> init_offset = 0;
+    }
+    return res;
+}
+
+#if OFFLOAD_DEBUG > 0
+void __dv_desc_dump(const char *name, const ArrDesc *dvp)
+{
+    OFFLOAD_TRACE(3, "%s DV %p\n", name, dvp);
+
+    if (dvp != 0) {
+        OFFLOAD_TRACE(3,
+                      "    dv->Base   = 0x%lx\n"
+                      "    dv->Len    = 0x%lx\n"
+                      "    dv->Offset = 0x%lx\n"
+                      "    dv->Flags  = 0x%lx\n"
+                      "    dv->Rank   = 0x%lx\n"
+                      "    dv->Resrvd = 0x%lx\n",
+                      dvp->Base,
+                      dvp->Len,
+                      dvp->Offset,
+                      dvp->Flags,
+                      dvp->Rank,
+                      dvp->Reserved);
+
+        for (int i = 0 ; i < dvp->Rank; i++) {
+            OFFLOAD_TRACE(3,
+                          "    (%d) Extent=%ld, Multiplier=%ld, LowerBound=%ld\n",
+                          i,
+                          dvp->Dim[i].Extent,
+                          dvp->Dim[i].Mult,
+                          dvp->Dim[i].LowerBound);
+        }
+    }
+}
+#endif // OFFLOAD_DEBUG > 0

Added: openmp/trunk/offload/src/dv_util.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/dv_util.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/dv_util.h (added)
+++ openmp/trunk/offload/src/dv_util.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef DV_UTIL_H_INCLUDED
+#define DV_UTIL_H_INCLUDED
+
+#include <stdint.h>
+
+// Dope vector declarations
+#define ArrDescMaxArrayRank         31
+
+// Dope vector flags
+#define ArrDescFlagsDefined         1
+#define ArrDescFlagsNodealloc       2
+#define ArrDescFlagsContiguous      4
+
+typedef int64_t dv_size;
+
+typedef struct DimDesc {
+    dv_size        Extent;      // Number of elements in this dimension
+    dv_size        Mult;        // Multiplier for this dimension.
+                                // The number of bytes between successive
+                                // elements in this dimension.
+    dv_size        LowerBound;  // LowerBound of this dimension
+} DimDesc ;
+
+typedef struct ArrDesc {
+    dv_size        Base;        // Base address
+    dv_size        Len;         // Length of data type, used only for
+                                // character strings.
+    dv_size        Offset;
+    dv_size        Flags;       // Flags
+    dv_size        Rank;        // Rank of pointer
+    dv_size        Reserved;    // reserved for openmp requests
+    DimDesc Dim[ArrDescMaxArrayRank];
+} ArrDesc ;
+
+typedef ArrDesc* pArrDesc;
+
+bool __dv_is_contiguous(const ArrDesc *dvp);
+
+bool __dv_is_allocated(const ArrDesc *dvp);
+
+uint64_t __dv_data_length(const ArrDesc *dvp);
+
+uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
+
+CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
+
+#if OFFLOAD_DEBUG > 0
+void    __dv_desc_dump(const char *name, const ArrDesc *dvp);
+#else // OFFLOAD_DEBUG
+#define __dv_desc_dump(name, dvp)
+#endif // OFFLOAD_DEBUG
+
+#endif // DV_UTIL_H_INCLUDED

Added: openmp/trunk/offload/src/liboffload_error.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/liboffload_error.c?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/liboffload_error.c (added)
+++ openmp/trunk/offload/src/liboffload_error.c Wed Apr  9 10:40:23 2014
@@ -0,0 +1,452 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#ifndef va_copy
+#define va_copy(dst, src) ((dst) = (src))
+#endif
+
+#include "liboffload_msg.h"
+
+#include "liboffload_error_codes.h"
+
+/***********************************************/
+/* error-handling function, liboffload_error_support */
+/***********************************************/
+
+void __liboffload_error_support(error_types input_tag, ...)
+{
+    va_list args;
+    va_start(args, input_tag);
+
+    switch (input_tag) {
+        case c_device_is_not_available:
+            write_message(stderr, msg_c_device_is_not_available, args);
+            break;
+        case c_invalid_device_number:
+            write_message(stderr, msg_c_invalid_device_number, args);
+            break;
+        case c_send_func_ptr:
+            write_message(stderr, msg_c_send_func_ptr, args);
+            break;
+        case c_receive_func_ptr:
+            write_message(stderr, msg_c_receive_func_ptr, args);
+            break;
+        case c_offload_malloc:
+            write_message(stderr, msg_c_offload_malloc, args);
+            break;
+        case c_offload1:
+            write_message(stderr, msg_c_offload1, args);
+            break;
+        case c_unknown_var_type:
+            write_message(stderr, c_unknown_var_type, args);
+            break;
+        case c_invalid_env_var_value:
+            write_message(stderr, msg_c_invalid_env_var_value, args);
+            break;
+        case c_invalid_env_var_int_value:
+            write_message(stderr, msg_c_invalid_env_var_int_value, args);
+            break;
+        case c_invalid_env_report_value:
+            write_message(stderr, msg_c_invalid_env_report_value, args);
+            break;
+        case c_offload_signaled1:
+            write_message(stderr, msg_c_offload_signaled1, args);
+            break;
+        case c_offload_signaled2:
+            write_message(stderr, msg_c_offload_signaled2, args);
+            break;
+        case c_myowrapper_checkresult:
+            write_message(stderr, msg_c_myowrapper_checkresult, args);
+            break;
+        case c_myotarget_checkresult:
+            write_message(stderr, msg_c_myotarget_checkresult, args);
+            break;
+        case c_offload_descriptor_offload:
+            write_message(stderr, msg_c_offload_descriptor_offload, args);
+            break;
+        case c_merge_var_descs1:
+            write_message(stderr, msg_c_merge_var_descs1, args);
+            break;
+        case c_merge_var_descs2:
+            write_message(stderr, msg_c_merge_var_descs2, args);
+            break;
+        case c_mic_parse_env_var_list1:
+            write_message(stderr, msg_c_mic_parse_env_var_list1, args);
+            break;
+        case c_mic_parse_env_var_list2:
+            write_message(stderr, msg_c_mic_parse_env_var_list2, args);
+            break;
+        case c_mic_process_exit_ret:
+            write_message(stderr, msg_c_mic_process_exit_ret, args);
+            break;
+        case c_mic_process_exit_sig:
+            write_message(stderr, msg_c_mic_process_exit_sig, args);
+            break;
+        case c_mic_process_exit:
+            write_message(stderr, msg_c_mic_process_exit, args);
+            break;
+        case c_mic_init3:
+            write_message(stderr, msg_c_mic_init3, args);
+            break;
+        case c_mic_init4:
+            write_message(stderr, msg_c_mic_init4, args);
+            break;
+        case c_mic_init5:
+            write_message(stderr, msg_c_mic_init5, args);
+            break;
+        case c_mic_init6:
+            write_message(stderr, msg_c_mic_init6, args);
+            break;
+        case c_no_static_var_data:
+            write_message(stderr, msg_c_no_static_var_data, args);
+            break;
+        case c_no_ptr_data:
+            write_message(stderr, msg_c_no_ptr_data, args);
+            break;
+        case c_get_engine_handle:
+            write_message(stderr, msg_c_get_engine_handle, args);
+            break;
+        case c_get_engine_index:
+            write_message(stderr, msg_c_get_engine_index, args);
+            break;
+        case c_process_create:
+            write_message(stderr, msg_c_process_create, args);
+            break;
+        case c_process_wait_shutdown:
+            write_message(stderr, msg_c_process_wait_shutdown, args);
+            break;
+        case c_process_proxy_flush:
+            write_message(stderr, msg_c_process_proxy_flush, args);
+            break;
+        case c_process_get_func_handles:
+            write_message(stderr, msg_c_process_get_func_handles, args);
+            break;
+        case c_load_library:
+            write_message(stderr, msg_c_load_library, args);
+            break;
+        case c_coipipe_max_number:
+            write_message(stderr, msg_c_coi_pipeline_max_number, args);
+            break;
+        case c_pipeline_create:
+            write_message(stderr, msg_c_pipeline_create, args);
+            break;
+        case c_pipeline_run_func:
+            write_message(stderr, msg_c_pipeline_run_func, args);
+            break;
+        case c_pipeline_start_run_funcs:
+            write_message(stderr, msg_c_pipeline_start_run_funcs, args);
+            break;
+        case c_buf_create:
+            write_message(stderr, msg_c_buf_create, args);
+            break;
+        case c_buf_create_out_of_mem:
+            write_message(stderr, msg_c_buf_create_out_of_mem, args);
+            break;
+        case c_buf_create_from_mem:
+            write_message(stderr, msg_c_buf_create_from_mem, args);
+            break;
+        case c_buf_destroy:
+            write_message(stderr, msg_c_buf_destroy, args);
+            break;
+        case c_buf_map:
+            write_message(stderr, msg_c_buf_map, args);
+            break;
+        case c_buf_unmap:
+            write_message(stderr, msg_c_buf_unmap, args);
+            break;
+        case c_buf_read:
+            write_message(stderr, msg_c_buf_read, args);
+            break;
+        case c_buf_write:
+            write_message(stderr, msg_c_buf_write, args);
+            break;
+        case c_buf_copy:
+            write_message(stderr, msg_c_buf_copy, args);
+            break;
+        case c_buf_get_address:
+            write_message(stderr, msg_c_buf_get_address, args);
+            break;
+        case c_buf_add_ref:
+            write_message(stderr, msg_c_buf_add_ref, args);
+            break;
+        case c_buf_release_ref:
+            write_message(stderr, msg_c_buf_release_ref, args);
+            break;
+        case c_buf_set_state:
+            write_message(stderr, msg_c_buf_set_state, args);
+            break;
+        case c_event_wait:
+            write_message(stderr, msg_c_event_wait, args);
+            break;
+        case c_zero_or_neg_ptr_len:
+            write_message(stderr, msg_c_zero_or_neg_ptr_len, args);
+            break;
+        case c_zero_or_neg_transfer_size:
+            write_message(stderr, msg_c_zero_or_neg_transfer_size, args);
+            break;
+        case c_bad_ptr_mem_range:
+            write_message(stderr, msg_c_bad_ptr_mem_range, args);
+            break;
+        case c_different_src_and_dstn_sizes:
+            write_message(stderr, msg_c_different_src_and_dstn_sizes, args);
+            break;
+        case c_ranges_dont_match:
+            write_message(stderr, msg_c_ranges_dont_match, args);
+            break;
+        case c_destination_is_over:
+            write_message(stderr, msg_c_destination_is_over, args);
+            break;
+        case c_slice_of_noncont_array:
+            write_message(stderr, msg_c_slice_of_noncont_array, args);
+            break;
+        case c_non_contiguous_dope_vector:
+            write_message(stderr, msg_c_non_contiguous_dope_vector, args);
+            break;
+        case c_pointer_array_mismatch:
+            write_message(stderr, msg_c_pointer_array_mismatch, args);
+            break;
+        case c_omp_invalid_device_num_env:
+            write_message(stderr, msg_c_omp_invalid_device_num_env, args);
+            break;
+        case c_omp_invalid_device_num:
+            write_message(stderr, msg_c_omp_invalid_device_num, args);
+            break;
+        case c_unknown_binary_type:
+            write_message(stderr, msg_c_unknown_binary_type, args);
+            break;
+        case c_multiple_target_exes:
+            write_message(stderr, msg_c_multiple_target_exes, args);
+            break;
+        case c_no_target_exe:
+            write_message(stderr, msg_c_no_target_exe, args);
+            break;
+        case c_report_unknown_timer_node:
+            write_message(stderr, msg_c_report_unknown_timer_node, args);
+            break;
+        case c_report_unknown_trace_node:
+            write_message(stderr, msg_c_report_unknown_trace_node, args);
+            break;
+    }
+    va_end(args);
+}
+
+char const * report_get_message_str(error_types input_tag)
+{
+    switch (input_tag) {
+        case c_report_title:
+            return (offload_get_message_str(msg_c_report_title));
+        case c_report_from_file:
+            return (offload_get_message_str(msg_c_report_from_file));
+        case c_report_offload:
+            return (offload_get_message_str(msg_c_report_offload));
+        case c_report_mic:
+            return (offload_get_message_str(msg_c_report_mic));
+        case c_report_file:
+            return (offload_get_message_str(msg_c_report_file));
+        case c_report_line:
+            return (offload_get_message_str(msg_c_report_line));
+        case c_report_host:
+            return (offload_get_message_str(msg_c_report_host));
+        case c_report_tag:
+            return (offload_get_message_str(msg_c_report_tag));
+        case c_report_cpu_time:
+            return (offload_get_message_str(msg_c_report_cpu_time));
+        case c_report_seconds:
+            return (offload_get_message_str(msg_c_report_seconds));
+        case c_report_cpu_to_mic_data:
+            return (offload_get_message_str(msg_c_report_cpu_to_mic_data));
+        case c_report_bytes:
+            return (offload_get_message_str(msg_c_report_bytes));
+        case c_report_mic_time:
+            return (offload_get_message_str(msg_c_report_mic_time));
+        case c_report_mic_to_cpu_data:
+            return (offload_get_message_str(msg_c_report_mic_to_cpu_data));
+        case c_report_compute:
+            return (offload_get_message_str(msg_c_report_compute));
+        case c_report_copyin_data:
+            return (offload_get_message_str(msg_c_report_copyin_data));
+        case c_report_copyout_data:
+            return (offload_get_message_str(msg_c_report_copyout_data));
+        case c_report_create_buf_host:
+            return (offload_get_message_str(c_report_create_buf_host));
+        case c_report_create_buf_mic:
+            return (offload_get_message_str(msg_c_report_create_buf_mic));
+        case c_report_destroy:
+            return (offload_get_message_str(msg_c_report_destroy));
+        case c_report_gather_copyin_data:
+            return (offload_get_message_str(msg_c_report_gather_copyin_data));
+        case c_report_gather_copyout_data:
+            return (offload_get_message_str(msg_c_report_gather_copyout_data));
+        case c_report_state_signal:
+            return (offload_get_message_str(msg_c_report_state_signal));
+        case c_report_signal:
+            return (offload_get_message_str(msg_c_report_signal));
+        case c_report_wait:
+            return (offload_get_message_str(msg_c_report_wait));
+        case c_report_init:
+            return (offload_get_message_str(msg_c_report_init));
+        case c_report_init_func:
+            return (offload_get_message_str(msg_c_report_init_func));
+        case c_report_logical_card:
+            return (offload_get_message_str(msg_c_report_logical_card));
+        case c_report_mic_myo_fptr:
+            return (offload_get_message_str(msg_c_report_mic_myo_fptr));
+        case c_report_mic_myo_shared:
+            return (offload_get_message_str(msg_c_report_mic_myo_shared));
+        case c_report_myoacquire:
+            return (offload_get_message_str(msg_c_report_myoacquire));
+        case c_report_myofini:
+            return (offload_get_message_str(msg_c_report_myofini));
+        case c_report_myoinit:
+            return (offload_get_message_str(msg_c_report_myoinit));
+        case c_report_myoregister:
+            return (offload_get_message_str(msg_c_report_myoregister));
+        case c_report_myorelease:
+            return (offload_get_message_str(msg_c_report_myorelease));
+        case c_report_myosharedalignedfree:
+            return (
+                offload_get_message_str(msg_c_report_myosharedalignedfree));
+        case c_report_myosharedalignedmalloc:
+            return (
+                offload_get_message_str(msg_c_report_myosharedalignedmalloc));
+        case c_report_myosharedfree:
+            return (offload_get_message_str(msg_c_report_myosharedfree));
+        case c_report_myosharedmalloc:
+            return (offload_get_message_str(msg_c_report_myosharedmalloc));
+        case c_report_physical_card:
+            return (offload_get_message_str(msg_c_report_physical_card));
+        case c_report_receive_pointer_data:
+            return (
+                offload_get_message_str(msg_c_report_receive_pointer_data));
+        case c_report_received_pointer_data:
+            return (
+                offload_get_message_str(msg_c_report_received_pointer_data));
+        case c_report_register:
+            return (offload_get_message_str(msg_c_report_register));
+        case c_report_scatter_copyin_data:
+            return (offload_get_message_str(msg_c_report_scatter_copyin_data));
+        case c_report_scatter_copyout_data:
+            return (
+                offload_get_message_str(msg_c_report_scatter_copyout_data));
+        case c_report_send_pointer_data:
+            return (offload_get_message_str(msg_c_report_send_pointer_data));
+        case c_report_sent_pointer_data:
+            return (offload_get_message_str(msg_c_report_sent_pointer_data));
+        case c_report_start:
+            return (offload_get_message_str(msg_c_report_start));
+        case c_report_start_target_func:
+            return (offload_get_message_str(msg_c_report_start_target_func));
+        case c_report_state:
+            return (offload_get_message_str(msg_c_report_state));
+        case c_report_unregister:
+            return (offload_get_message_str(msg_c_report_unregister));
+        case c_report_var:
+            return (offload_get_message_str(msg_c_report_var));
+
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
+            abort();
+    }
+}
+
+char const * report_get_host_stage_str(int i)
+{
+    switch (i) {
+        case c_offload_host_total_offload:
+            return (
+               offload_get_message_str(msg_c_report_host_total_offload_time));
+        case c_offload_host_initialize:
+            return (offload_get_message_str(msg_c_report_host_initialize));
+        case c_offload_host_target_acquire:
+            return (
+                offload_get_message_str(msg_c_report_host_target_acquire));
+        case c_offload_host_wait_deps:
+            return (offload_get_message_str(msg_c_report_host_wait_deps));
+        case c_offload_host_setup_buffers:
+            return (offload_get_message_str(msg_c_report_host_setup_buffers));
+        case c_offload_host_alloc_buffers:
+            return (offload_get_message_str(msg_c_report_host_alloc_buffers));
+        case c_offload_host_setup_misc_data:
+            return (
+                offload_get_message_str(msg_c_report_host_setup_misc_data));
+        case c_offload_host_alloc_data_buffer:
+            return (
+                offload_get_message_str(msg_c_report_host_alloc_data_buffer));
+        case c_offload_host_send_pointers:
+            return (offload_get_message_str(msg_c_report_host_send_pointers));
+        case c_offload_host_gather_inputs:
+            return (offload_get_message_str(msg_c_report_host_gather_inputs));
+        case c_offload_host_map_in_data_buffer:
+            return (
+                offload_get_message_str(msg_c_report_host_map_in_data_buffer));
+        case c_offload_host_unmap_in_data_buffer:
+            return (offload_get_message_str(
+                msg_c_report_host_unmap_in_data_buffer));
+        case c_offload_host_start_compute:
+            return (offload_get_message_str(msg_c_report_host_start_compute));
+        case c_offload_host_wait_compute:
+            return (offload_get_message_str(msg_c_report_host_wait_compute));
+        case c_offload_host_start_buffers_reads:
+            return (offload_get_message_str(
+                msg_c_report_host_start_buffers_reads));
+        case c_offload_host_scatter_outputs:
+            return (
+                offload_get_message_str(msg_c_report_host_scatter_outputs));
+        case c_offload_host_map_out_data_buffer:
+            return (offload_get_message_str(
+                msg_c_report_host_map_out_data_buffer));
+        case c_offload_host_unmap_out_data_buffer:
+            return (offload_get_message_str(
+                msg_c_report_host_unmap_out_data_buffer));
+        case c_offload_host_wait_buffers_reads:
+            return (
+                offload_get_message_str(msg_c_report_host_wait_buffers_reads));
+        case c_offload_host_destroy_buffers:
+            return (
+                offload_get_message_str(msg_c_report_host_destroy_buffers));
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
+            abort();
+    }
+}
+
+char const * report_get_target_stage_str(int i)
+{
+    switch (i) {
+        case c_offload_target_total_time:
+            return (offload_get_message_str(msg_c_report_target_total_time));
+        case c_offload_target_descriptor_setup:
+            return (
+                offload_get_message_str(msg_c_report_target_descriptor_setup));
+        case c_offload_target_func_lookup:
+            return (offload_get_message_str(msg_c_report_target_func_lookup));
+        case c_offload_target_func_time:
+            return (offload_get_message_str(msg_c_report_target_func_time));
+        case c_offload_target_scatter_inputs:
+            return (
+                offload_get_message_str(msg_c_report_target_scatter_inputs));
+        case c_offload_target_add_buffer_refs:
+            return (
+                offload_get_message_str(msg_c_report_target_add_buffer_refs));
+        case c_offload_target_compute:
+            return (offload_get_message_str(msg_c_report_target_compute));
+        case c_offload_target_gather_outputs:
+            return (offload_get_message_str
+                (msg_c_report_target_gather_outputs));
+        case c_offload_target_release_buffer_refs:
+            return (offload_get_message_str(
+                msg_c_report_target_release_buffer_refs));
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
+            abort();
+    }
+}

Added: openmp/trunk/offload/src/liboffload_error_codes.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/liboffload_error_codes.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/liboffload_error_codes.h (added)
+++ openmp/trunk/offload/src/liboffload_error_codes.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,276 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if !defined(LIBOFFLOAD_ERROR_CODES_H)
+#define LIBOFFLOAD_ERROR_CODES_H
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef enum
+{
+    c_device_is_not_available = 0,
+    c_invalid_device_number,
+    c_offload1,
+    c_unknown_var_type,
+    c_send_func_ptr,
+    c_receive_func_ptr,
+    c_offload_malloc,
+    c_invalid_env_var_value,
+    c_invalid_env_var_int_value,
+    c_invalid_env_report_value,
+    c_offload_signaled1,
+    c_offload_signaled2,
+    c_myotarget_checkresult,
+    c_myowrapper_checkresult,
+    c_offload_descriptor_offload,
+    c_merge_var_descs1,
+    c_merge_var_descs2,
+    c_mic_parse_env_var_list1,
+    c_mic_parse_env_var_list2,
+    c_mic_process_exit_ret,
+    c_mic_process_exit_sig,
+    c_mic_process_exit,
+    c_mic_init3,
+    c_mic_init4,
+    c_mic_init5,
+    c_mic_init6,
+    c_no_static_var_data,
+    c_no_ptr_data,
+    c_get_engine_handle,
+    c_get_engine_index,
+    c_process_create,
+    c_process_get_func_handles,
+    c_process_wait_shutdown,
+    c_process_proxy_flush,
+    c_load_library,
+    c_pipeline_create,
+    c_pipeline_run_func,
+    c_pipeline_start_run_funcs,
+    c_buf_create,
+    c_buf_create_out_of_mem,
+    c_buf_create_from_mem,
+    c_buf_destroy,
+    c_buf_map,
+    c_buf_unmap,
+    c_buf_read,
+    c_buf_write,
+    c_buf_copy,
+    c_buf_get_address,
+    c_buf_add_ref,
+    c_buf_release_ref,
+    c_buf_set_state,
+    c_event_wait,
+    c_zero_or_neg_ptr_len,
+    c_zero_or_neg_transfer_size,
+    c_bad_ptr_mem_range,
+    c_different_src_and_dstn_sizes,
+    c_ranges_dont_match,
+    c_destination_is_over,
+    c_slice_of_noncont_array,
+    c_non_contiguous_dope_vector,
+    c_pointer_array_mismatch,
+    c_omp_invalid_device_num_env,
+    c_omp_invalid_device_num,
+    c_unknown_binary_type,
+    c_multiple_target_exes,
+    c_no_target_exe,
+    c_report_host,
+    c_report_target,
+    c_report_title,
+    c_report_from_file,
+    c_report_file,
+    c_report_line,
+    c_report_tag,
+    c_report_seconds,
+    c_report_bytes,
+    c_report_mic,
+    c_report_cpu_time,
+    c_report_cpu_to_mic_data,
+    c_report_mic_time,
+    c_report_mic_to_cpu_data,
+    c_report_unknown_timer_node,
+    c_report_unknown_trace_node,
+    c_report_offload,
+    c_report_w_tag,
+    c_report_state,
+    c_report_start,
+    c_report_init,
+    c_report_logical_card,
+    c_report_physical_card,
+    c_report_register,
+    c_report_init_func,
+    c_report_create_buf_host,
+    c_report_create_buf_mic,
+    c_report_send_pointer_data,
+    c_report_sent_pointer_data,
+    c_report_gather_copyin_data,
+    c_report_copyin_data,
+    c_report_state_signal,
+    c_report_signal,
+    c_report_wait,
+    c_report_compute,
+    c_report_receive_pointer_data,
+    c_report_received_pointer_data,
+    c_report_start_target_func,
+    c_report_var,
+    c_report_scatter_copyin_data,
+    c_report_gather_copyout_data,
+    c_report_scatter_copyout_data,
+    c_report_copyout_data,
+    c_report_unregister,
+    c_report_destroy,
+    c_report_myoinit,
+    c_report_myoregister,
+    c_report_myofini,
+    c_report_mic_myo_shared,
+    c_report_mic_myo_fptr,
+    c_report_myosharedmalloc,
+    c_report_myosharedfree,
+    c_report_myosharedalignedmalloc,
+    c_report_myosharedalignedfree,
+    c_report_myoacquire,
+    c_report_myorelease,
+    c_coipipe_max_number
+} error_types;
+
+enum OffloadHostPhase {
+    // Total time on host for entire offload
+    c_offload_host_total_offload = 0,
+
+    // Time to load target binary
+    c_offload_host_initialize,
+
+    // Time to acquire lrb availability dynamically
+    c_offload_host_target_acquire,
+
+    // Time to wait for dependencies
+    c_offload_host_wait_deps,
+
+    // Time to allocate pointer buffers, initiate writes for pointers
+    // and calculate size of copyin/copyout buffer
+    c_offload_host_setup_buffers,
+
+    // Time to allocate pointer buffers
+    c_offload_host_alloc_buffers,
+
+    // Time to initialize misc data
+    c_offload_host_setup_misc_data,
+
+    // Time to allocate copyin/copyout buffer
+    c_offload_host_alloc_data_buffer,
+
+    // Time to initiate writes from host pointers to buffers
+    c_offload_host_send_pointers,
+
+    // Time to Gather IN data of offload into buffer
+    c_offload_host_gather_inputs,
+
+    // Time to map buffer
+    c_offload_host_map_in_data_buffer,
+
+    // Time to unmap buffer
+    c_offload_host_unmap_in_data_buffer,
+
+    // Time to start remote function call that does computation on lrb
+    c_offload_host_start_compute,
+
+    // Time to wait for compute to finish
+    c_offload_host_wait_compute,
+
+    // Time to initiate reads from pointer buffers
+    c_offload_host_start_buffers_reads,
+
+    // Time to update host variabels with OUT data from buffer
+    c_offload_host_scatter_outputs,
+
+    // Time to map buffer
+    c_offload_host_map_out_data_buffer,
+
+    // Time to unmap buffer
+    c_offload_host_unmap_out_data_buffer,
+
+    // Time to wait reads from buffers to finish
+    c_offload_host_wait_buffers_reads,
+
+    // Time to destroy buffers that are no longer needed
+    c_offload_host_destroy_buffers,
+
+    // LAST TIME MONITOR
+    c_offload_host_max_phase
+};
+
+enum OffloadTargetPhase {
+    // Total time spent on the target
+    c_offload_target_total_time = 0,
+
+    // Time to initialize offload descriptor
+    c_offload_target_descriptor_setup,
+
+    // Time to find target entry point in lookup table
+    c_offload_target_func_lookup,
+
+    // Total time spend executing offload entry
+    c_offload_target_func_time,
+
+    // Time to initialize target variables with IN values from buffer
+    c_offload_target_scatter_inputs,
+
+    // Time to add buffer reference for pointer buffers
+    c_offload_target_add_buffer_refs,
+
+    // Total time on lrb for computation
+    c_offload_target_compute,
+
+    // On lrb, time to copy OUT into buffer
+    c_offload_target_gather_outputs,
+
+    // Time to release buffer references
+    c_offload_target_release_buffer_refs,
+
+    // LAST TIME MONITOR
+    c_offload_target_max_phase
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __liboffload_error_support(error_types input_tag, ...);
+void __liboffload_report_support(error_types input_tag, ...);
+char const *offload_get_message_str(int msgCode);
+char const * report_get_message_str(error_types input_tag);
+char const * report_get_host_stage_str(int i);
+char const * report_get_target_stage_str(int i);
+#ifdef __cplusplus
+}
+#endif
+
+#define test_msg_cat(nm, msg) \
+    fprintf(stderr, "\t TEST for %s \n \t", nm); \
+    __liboffload_error_support(msg);
+
+#define test_msg_cat1(nm, msg, ...) \
+    fprintf(stderr, "\t TEST for %s \n \t", nm); \
+    __liboffload_error_support(msg, __VA_ARGS__);
+
+void write_message(FILE * file, int msgCode, va_list args_p);
+
+#define LIBOFFLOAD_ERROR __liboffload_error_support
+
+#ifdef TARGET_WINNT
+#define LIBOFFLOAD_ABORT \
+         _set_abort_behavior(0, _WRITE_ABORT_MSG); \
+         abort()
+#else
+#define LIBOFFLOAD_ABORT \
+         abort()
+#endif
+
+#endif // !defined(LIBOFFLOAD_ERROR_CODES_H)

Added: openmp/trunk/offload/src/liboffload_msg.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/liboffload_msg.c?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/liboffload_msg.c (added)
+++ openmp/trunk/offload/src/liboffload_msg.c Wed Apr  9 10:40:23 2014
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include <stdio.h>
+
+// ===========================================================================
+// Bring in the static string table and the enumerations for indexing into
+// it.
+// ===========================================================================
+
+#include "liboffload_msg.h"
+
+# define DYNART_STDERR_PUTS(__message_text__) fputs((__message_text__),stderr)
+
+// ===========================================================================
+// Now the code for accessing the message catalogs
+// ===========================================================================
+
+
+    void write_message(FILE * file, int msgCode) {
+        fputs(MESSAGE_TABLE_NAME[ msgCode ], file);
+        fflush(file);
+    }
+
+    char const *offload_get_message_str(int msgCode) {
+        return MESSAGE_TABLE_NAME[ msgCode ];
+    }

Added: openmp/trunk/offload/src/liboffload_msg.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/liboffload_msg.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/liboffload_msg.h (added)
+++ openmp/trunk/offload/src/liboffload_msg.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,326 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+// file: liboffload_msg.h
+enum {
+	__dummy__ = 0,
+	msg_c_device_is_not_available,
+	msg_c_invalid_device_number,
+	msg_c_send_func_ptr,
+	msg_c_receive_func_ptr,
+	msg_c_offload_malloc,
+	msg_c_offload1,
+	msg_c_unknown_var_type,
+	msg_c_invalid_env_var_value,
+	msg_c_invalid_env_var_int_value,
+	msg_c_invalid_env_report_value,
+	msg_c_offload_signaled1,
+	msg_c_offload_signaled2,
+	msg_c_myowrapper_checkresult,
+	msg_c_myotarget_checkresult,
+	msg_c_offload_descriptor_offload,
+	msg_c_merge_var_descs1,
+	msg_c_merge_var_descs2,
+	msg_c_mic_parse_env_var_list1,
+	msg_c_mic_parse_env_var_list2,
+	msg_c_mic_process_exit_ret,
+	msg_c_mic_process_exit_sig,
+	msg_c_mic_process_exit,
+	msg_c_mic_init3,
+	msg_c_mic_init4,
+	msg_c_mic_init5,
+	msg_c_mic_init6,
+	msg_c_no_static_var_data,
+	msg_c_no_ptr_data,
+	msg_c_get_engine_handle,
+	msg_c_get_engine_index,
+	msg_c_process_create,
+	msg_c_process_get_func_handles,
+	msg_c_process_wait_shutdown,
+	msg_c_process_proxy_flush,
+	msg_c_load_library,
+	msg_c_pipeline_create,
+	msg_c_pipeline_run_func,
+	msg_c_pipeline_start_run_funcs,
+	msg_c_buf_create,
+	msg_c_buf_create_out_of_mem,
+	msg_c_buf_create_from_mem,
+	msg_c_buf_destroy,
+	msg_c_buf_map,
+	msg_c_buf_unmap,
+	msg_c_buf_read,
+	msg_c_buf_write,
+	msg_c_buf_copy,
+	msg_c_buf_get_address,
+	msg_c_buf_add_ref,
+	msg_c_buf_release_ref,
+	msg_c_buf_set_state,
+	msg_c_event_wait,
+	msg_c_zero_or_neg_ptr_len,
+	msg_c_zero_or_neg_transfer_size,
+	msg_c_bad_ptr_mem_range,
+	msg_c_different_src_and_dstn_sizes,
+	msg_c_non_contiguous_dope_vector,
+	msg_c_omp_invalid_device_num_env,
+	msg_c_omp_invalid_device_num,
+	msg_c_unknown_binary_type,
+	msg_c_multiple_target_exes,
+	msg_c_no_target_exe,
+	msg_c_report_unknown_timer_node,
+	msg_c_report_unknown_trace_node,
+	msg_c_report_host,
+	msg_c_report_mic,
+	msg_c_report_title,
+	msg_c_report_seconds,
+	msg_c_report_bytes,
+	msg_c_report_cpu_time,
+	msg_c_report_mic_time,
+	msg_c_report_tag,
+	msg_c_report_from_file,
+	msg_c_report_file,
+	msg_c_report_line,
+	msg_c_report_cpu_to_mic_data,
+	msg_c_report_mic_to_cpu_data,
+	msg_c_report_offload,
+	msg_c_report_w_tag,
+	msg_c_report_state,
+	msg_c_report_start,
+	msg_c_report_init,
+	msg_c_report_logical_card,
+	msg_c_report_physical_card,
+	msg_c_report_register,
+	msg_c_report_init_func,
+	msg_c_report_create_buf_host,
+	msg_c_report_create_buf_mic,
+	msg_c_report_send_pointer_data,
+	msg_c_report_sent_pointer_data,
+	msg_c_report_gather_copyin_data,
+	msg_c_report_copyin_data,
+	msg_c_report_state_signal,
+	msg_c_report_signal,
+	msg_c_report_wait,
+	msg_c_report_compute,
+	msg_c_report_receive_pointer_data,
+	msg_c_report_received_pointer_data,
+	msg_c_report_start_target_func,
+	msg_c_report_var,
+	msg_c_report_scatter_copyin_data,
+	msg_c_report_gather_copyout_data,
+	msg_c_report_scatter_copyout_data,
+	msg_c_report_copyout_data,
+	msg_c_report_unregister,
+	msg_c_report_destroy,
+	msg_c_report_myoinit,
+	msg_c_report_myoregister,
+	msg_c_report_myofini,
+	msg_c_report_mic_myo_shared,
+	msg_c_report_mic_myo_fptr,
+	msg_c_report_myosharedmalloc,
+	msg_c_report_myosharedfree,
+	msg_c_report_myosharedalignedmalloc,
+	msg_c_report_myosharedalignedfree,
+	msg_c_report_myoacquire,
+	msg_c_report_myorelease,
+	msg_c_report_host_total_offload_time,
+	msg_c_report_host_initialize,
+	msg_c_report_host_target_acquire,
+	msg_c_report_host_wait_deps,
+	msg_c_report_host_setup_buffers,
+	msg_c_report_host_alloc_buffers,
+	msg_c_report_host_setup_misc_data,
+	msg_c_report_host_alloc_data_buffer,
+	msg_c_report_host_send_pointers,
+	msg_c_report_host_gather_inputs,
+	msg_c_report_host_map_in_data_buffer,
+	msg_c_report_host_unmap_in_data_buffer,
+	msg_c_report_host_start_compute,
+	msg_c_report_host_wait_compute,
+	msg_c_report_host_start_buffers_reads,
+	msg_c_report_host_scatter_outputs,
+	msg_c_report_host_map_out_data_buffer,
+	msg_c_report_host_unmap_out_data_buffer,
+	msg_c_report_host_wait_buffers_reads,
+	msg_c_report_host_destroy_buffers,
+	msg_c_report_target_total_time,
+	msg_c_report_target_descriptor_setup,
+	msg_c_report_target_func_lookup,
+	msg_c_report_target_func_time,
+	msg_c_report_target_scatter_inputs,
+	msg_c_report_target_add_buffer_refs,
+	msg_c_report_target_compute,
+	msg_c_report_target_gather_outputs,
+	msg_c_report_target_release_buffer_refs,
+	msg_c_coi_pipeline_max_number,
+	msg_c_ranges_dont_match,
+	msg_c_destination_is_over,
+	msg_c_slice_of_noncont_array,
+	msg_c_pointer_array_mismatch,
+	lastMsg = 152,
+	firstMsg = 1
+};
+
+
+#if !defined(MESSAGE_TABLE_NAME)
+#    define MESSAGE_TABLE_NAME __liboffload_message_table
+#endif
+
+static char const * MESSAGE_TABLE_NAME[] = {
+	/*   0 __dummy__                               */ "Un-used message",
+	/*   1 msg_c_device_is_not_available           */ "offload error: cannot offload to MIC - device is not available",
+	/*   2 msg_c_invalid_device_number             */ "offload error: expected a number greater than or equal to -1",
+	/*   3 msg_c_send_func_ptr                     */ "offload error: cannot find function name for address %p",
+	/*   4 msg_c_receive_func_ptr                  */ "offload error: cannot find address of function %s",
+	/*   5 msg_c_offload_malloc                    */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
+	/*   6 msg_c_offload1                          */ "offload error: device %d does not have a pending signal for wait(%p)",
+	/*   7 msg_c_unknown_var_type                  */ "offload error: unknown variable type %d",
+	/*   8 msg_c_invalid_env_var_value             */ "offload warning: ignoring invalid value specified for %s",
+	/*   9 msg_c_invalid_env_var_int_value         */ "offload warning: specify an integer value for %s",
+	/*  10 msg_c_invalid_env_report_value          */ "offload warning: ignoring %s setting; use a value in range 1-3",
+	/*  11 msg_c_offload_signaled1                 */ "offload error: invalid device number %d specified in _Offload_signaled",
+	/*  12 msg_c_offload_signaled2                 */ "offload error: invalid signal %p specified for _Offload_signaled",
+	/*  13 msg_c_myowrapper_checkresult            */ "offload error: %s failed with error %d",
+	/*  14 msg_c_myotarget_checkresult             */ "offload error: %s failed with error %d",
+	/*  15 msg_c_offload_descriptor_offload        */ "offload error: cannot find offload entry %s",
+	/*  16 msg_c_merge_var_descs1                  */ "offload error: unexpected number of variable descriptors",
+	/*  17 msg_c_merge_var_descs2                  */ "offload error: unexpected variable type",
+	/*  18 msg_c_mic_parse_env_var_list1           */ "offload_error: MIC environment variable must begin with an alpabetic character",
+	/*  19 msg_c_mic_parse_env_var_list2           */ "offload_error: MIC environment variable value must be specified with ‘=’",
+	/*  20 msg_c_mic_process_exit_ret              */ "offload error: process on the device %d unexpectedly exited with code %d",
+	/*  21 msg_c_mic_process_exit_sig              */ "offload error: process on the device %d was terminated by signal %d (%s)",
+	/*  22 msg_c_mic_process_exit                  */ "offload error: process on the device %d was unexpectedly terminated",
+	/*  23 msg_c_mic_init3                         */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
+	/*  24 msg_c_mic_init4                         */ "offload error: thread key create failed with error %d",
+	/*  25 msg_c_mic_init5                         */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
+	/*  26 msg_c_mic_init6                         */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
+	/*  27 msg_c_no_static_var_data                */ "offload error: cannot find data associated with statically allocated variable %p",
+	/*  28 msg_c_no_ptr_data                       */ "offload error: cannot find data associated with pointer variable %p",
+	/*  29 msg_c_get_engine_handle                 */ "offload error: cannot get device %d handle (error code %d)",
+	/*  30 msg_c_get_engine_index                  */ "offload error: cannot get physical index for logical device %d (error code %d)",
+	/*  31 msg_c_process_create                    */ "offload error: cannot start process on the device %d (error code %d)",
+	/*  32 msg_c_process_get_func_handles          */ "offload error: cannot get function handles on the device %d (error code %d)",
+	/*  33 msg_c_process_wait_shutdown             */ "offload error: wait for process shutdown failed on device %d (error code %d)",
+	/*  34 msg_c_process_proxy_flush               */ "offload error: cannot flush process output on device %d (error code %d)",
+	/*  35 msg_c_load_library                      */ "offload error: cannot load library to the device %d (error code %d)",
+	/*  36 msg_c_pipeline_create                   */ "offload error: cannot create pipeline on the device %d (error code %d)",
+	/*  37 msg_c_pipeline_run_func                 */ "offload error: cannot execute function on the device %d (error code %d)",
+	/*  38 msg_c_pipeline_start_run_funcs          */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
+	/*  39 msg_c_buf_create                        */ "offload error: cannot create buffer on device %d (error code %d)",
+	/*  40 msg_c_buf_create_out_of_mem             */ "offload error: cannot create buffer on device %d, out of memory",
+	/*  41 msg_c_buf_create_from_mem               */ "offload error: cannot create buffer from memory on device %d (error code %d)",
+	/*  42 msg_c_buf_destroy                       */ "offload error: buffer destroy failed (error code %d)",
+	/*  43 msg_c_buf_map                           */ "offload error: buffer map failed (error code %d)",
+	/*  44 msg_c_buf_unmap                         */ "offload error: buffer unmap failed (error code %d)",
+	/*  45 msg_c_buf_read                          */ "offload error: buffer read failed (error code %d)",
+	/*  46 msg_c_buf_write                         */ "offload error: buffer write failed (error code %d)",
+	/*  47 msg_c_buf_copy                          */ "offload error: buffer copy failed (error code %d)",
+	/*  48 msg_c_buf_get_address                   */ "offload error: cannot get buffer address on device %d (error code %d)",
+	/*  49 msg_c_buf_add_ref                       */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
+	/*  50 msg_c_buf_release_ref                   */ "offload error: cannot release buffer memory on device %d (error code %d)",
+	/*  51 msg_c_buf_set_state                     */ "offload error: buffer set state failed (error code %d)",
+	/*  52 msg_c_event_wait                        */ "offload error: wait for event to become signaled failed (error code %d)",
+	/*  53 msg_c_zero_or_neg_ptr_len               */ "offload error: memory allocation of negative length is not supported",
+	/*  54 msg_c_zero_or_neg_transfer_size         */ "offload error: data transfer of zero or negative size is not supported",
+	/*  55 msg_c_bad_ptr_mem_range                 */ "offload error: address range partially overlaps with existing allocation",
+	/*  56 msg_c_different_src_and_dstn_sizes      */ "offload error: size of the source %d differs from size of the destination %d",
+	/*  57 msg_c_non_contiguous_dope_vector        */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
+	/*  58 msg_c_omp_invalid_device_num_env        */ "offload warning: ignoring %s setting; use a non-negative integer value",
+	/*  59 msg_c_omp_invalid_device_num            */ "offload error: device number should be a non-negative integer value",
+	/*  60 msg_c_unknown_binary_type               */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
+	/*  61 msg_c_multiple_target_exes              */ "offload error: more that one target executable found",
+	/*  62 msg_c_no_target_exe                     */ "offload error: target executable is not available",
+	/*  63 msg_c_report_unknown_timer_node         */ "offload error: unknown timer node",
+	/*  64 msg_c_report_unknown_trace_node         */ "offload error: unknown trace node",
+	/*  65 msg_c_report_host                       */ "HOST",
+	/*  66 msg_c_report_mic                        */ "MIC",
+	/*  67 msg_c_report_title                      */ "timer data       (sec)",
+	/*  68 msg_c_report_seconds                    */ "(seconds)",
+	/*  69 msg_c_report_bytes                      */ "(bytes)",
+	/*  70 msg_c_report_cpu_time                   */ "CPU Time",
+	/*  71 msg_c_report_mic_time                   */ "MIC Time",
+	/*  72 msg_c_report_tag                        */ "Tag",
+	/*  73 msg_c_report_from_file                  */ "Offload from file",
+	/*  74 msg_c_report_file                       */ "File",
+	/*  75 msg_c_report_line                       */ "Line",
+	/*  76 msg_c_report_cpu_to_mic_data            */ "CPU->MIC Data",
+	/*  77 msg_c_report_mic_to_cpu_data            */ "MIC->CPU Data",
+	/*  78 msg_c_report_offload                    */ "Offload",
+	/*  79 msg_c_report_w_tag                      */ "Tag %d",
+	/*  80 msg_c_report_state                      */ "State",
+	/*  81 msg_c_report_start                      */ "Start target",
+	/*  82 msg_c_report_init                       */ "Initialize",
+	/*  83 msg_c_report_logical_card               */ "logical card",
+	/*  84 msg_c_report_physical_card              */ "physical card",
+	/*  85 msg_c_report_register                   */ "Register static data tables",
+	/*  86 msg_c_report_init_func                  */ "Setup target entry",
+	/*  87 msg_c_report_create_buf_host            */ "Create host buffer",
+	/*  88 msg_c_report_create_buf_mic             */ "Create target buffer",
+	/*  89 msg_c_report_send_pointer_data          */ "Send pointer data",
+	/*  90 msg_c_report_sent_pointer_data          */ "Host->target pointer data",
+	/*  91 msg_c_report_gather_copyin_data         */ "Gather copyin data",
+	/*  92 msg_c_report_copyin_data                */ "Host->target copyin data",
+	/*  93 msg_c_report_state_signal               */ "Signal",
+	/*  94 msg_c_report_signal                     */ "signal :",
+	/*  95 msg_c_report_wait                       */ "waits  :",
+	/*  96 msg_c_report_compute                    */ "Execute task on target",
+	/*  97 msg_c_report_receive_pointer_data       */ "Receive pointer data",
+	/*  98 msg_c_report_received_pointer_data      */ "Target->host pointer data",
+	/*  99 msg_c_report_start_target_func          */ "Start target entry",
+	/* 100 msg_c_report_var                        */ "Var",
+	/* 101 msg_c_report_scatter_copyin_data        */ "Scatter copyin data",
+	/* 102 msg_c_report_gather_copyout_data        */ "Gather copyout data",
+	/* 103 msg_c_report_scatter_copyout_data       */ "Scatter copyout data",
+	/* 104 msg_c_report_copyout_data               */ "Target->host copyout data",
+	/* 105 msg_c_report_unregister                 */ "Unregister data tables",
+	/* 106 msg_c_report_destroy                    */ "Destroy",
+	/* 107 msg_c_report_myoinit                    */ "Initialize MYO",
+	/* 108 msg_c_report_myoregister                */ "Register MYO tables",
+	/* 109 msg_c_report_myofini                    */ "Finalize MYO",
+	/* 110 msg_c_report_mic_myo_shared             */ "MIC MYO shared table register",
+	/* 111 msg_c_report_mic_myo_fptr               */ "MIC MYO fptr table register",
+	/* 112 msg_c_report_myosharedmalloc            */ "MYO shared malloc",
+	/* 113 msg_c_report_myosharedfree              */ "MYO shared free",
+	/* 114 msg_c_report_myosharedalignedmalloc     */ "MYO shared aligned malloc",
+	/* 115 msg_c_report_myosharedalignedfree       */ "MYO shared aligned free",
+	/* 116 msg_c_report_myoacquire                 */ "MYO acquire",
+	/* 117 msg_c_report_myorelease                 */ "MYO release",
+	/* 118 msg_c_report_host_total_offload_time    */ "host: total offload time",
+	/* 119 msg_c_report_host_initialize            */ "host: initialize target",
+	/* 120 msg_c_report_host_target_acquire        */ "host: acquire target",
+	/* 121 msg_c_report_host_wait_deps             */ "host: wait dependencies",
+	/* 122 msg_c_report_host_setup_buffers         */ "host: setup buffers",
+	/* 123 msg_c_report_host_alloc_buffers         */ "host: allocate buffers",
+	/* 124 msg_c_report_host_setup_misc_data       */ "host: setup misc_data",
+	/* 125 msg_c_report_host_alloc_data_buffer     */ "host: allocate buffer",
+	/* 126 msg_c_report_host_send_pointers         */ "host: send pointers",
+	/* 127 msg_c_report_host_gather_inputs         */ "host: gather inputs",
+	/* 128 msg_c_report_host_map_in_data_buffer    */ "host: map IN data buffer",
+	/* 129 msg_c_report_host_unmap_in_data_buffer  */ "host: unmap IN data buffer",
+	/* 130 msg_c_report_host_start_compute         */ "host: initiate compute",
+	/* 131 msg_c_report_host_wait_compute          */ "host: wait compute",
+	/* 132 msg_c_report_host_start_buffers_reads   */ "host: initiate pointer reads",
+	/* 133 msg_c_report_host_scatter_outputs       */ "host: scatter outputs",
+	/* 134 msg_c_report_host_map_out_data_buffer   */ "host: map OUT data buffer",
+	/* 135 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
+	/* 136 msg_c_report_host_wait_buffers_reads    */ "host: wait pointer reads",
+	/* 137 msg_c_report_host_destroy_buffers       */ "host: destroy buffers",
+	/* 138 msg_c_report_target_total_time          */ "target: total time",
+	/* 139 msg_c_report_target_descriptor_setup    */ "target: setup offload descriptor",
+	/* 140 msg_c_report_target_func_lookup         */ "target: entry lookup",
+	/* 141 msg_c_report_target_func_time           */ "target: entry time",
+	/* 142 msg_c_report_target_scatter_inputs      */ "target: scatter inputs",
+	/* 143 msg_c_report_target_add_buffer_refs     */ "target: add buffer reference",
+	/* 144 msg_c_report_target_compute             */ "target: compute",
+	/* 145 msg_c_report_target_gather_outputs      */ "target: gather outputs",
+	/* 146 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
+	/* 147 msg_c_coi_pipeline_max_number           */ "number of host threads doing offload exceeds maximum of %d",
+	/* 148 msg_c_ranges_dont_match                 */ "ranges of source and destination don't match together",
+	/* 149 msg_c_destination_is_over               */ "insufficient destination memory to transfer source",
+	/* 150 msg_c_slice_of_noncont_array            */ "a non-contiguous slice may be taken of contiguous arrays only",
+	/* 151 msg_c_pointer_array_mismatch            */ "number of %s elements is less than described by the source",
+};

Added: openmp/trunk/offload/src/mic_lib.f90
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/mic_lib.f90?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/mic_lib.f90 (added)
+++ openmp/trunk/offload/src/mic_lib.f90 Wed Apr  9 10:40:23 2014
@@ -0,0 +1,441 @@
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+
+! **********************************************************************************
+! * This file is intended to support the Intel(r) Many Integrated Core Architecture.
+! **********************************************************************************
+! free form Fortran source - should be named .f90
+! lines are longer than 72 characters
+
+module mic_lib
+use, intrinsic :: iso_c_binding
+
+integer, parameter:: target_mic=2
+integer, parameter:: default_target_type=target_mic
+integer, parameter:: default_target_number=0
+
+enum, bind(C)
+    enumerator :: OFFLOAD_SUCCESS  = 0
+    enumerator :: OFFLOAD_DISABLED          ! offload is disabled
+    enumerator :: OFFLOAD_UNAVAILABLE       ! card is not available
+    enumerator :: OFFLOAD_OUT_OF_MEMORY     ! not enough memory on device
+    enumerator :: OFFLOAD_PROCESS_DIED      ! target process has died
+    enumerator :: OFFLOAD_ERROR             ! unspecified error
+end enum
+
+type, bind (C) :: offload_status
+    integer(kind=c_int)    ::  result          = OFFLOAD_DISABLED
+    integer(kind=c_int)    ::  device_number   = -1
+    integer(kind=c_size_t) ::  data_sent       = 0
+    integer(kind=c_size_t) ::  data_received   = 0
+end type offload_status
+
+interface
+function offload_number_of_devices ()                                  &
+           bind (C, name = "_Offload_number_of_devices")
+!dec$ attributes default :: offload_number_of_devices
+  import :: c_int
+  integer (kind=c_int)        :: offload_number_of_devices
+!dec$ attributes offload:mic :: offload_number_of_devices
+!dir$ attributes known_intrinsic ::  offload_number_of_devices
+end function offload_number_of_devices
+
+function offload_signaled(target_number, signal)                       &
+           bind (C, name = "_Offload_signaled")
+!dec$ attributes default :: offload_signaled
+  import :: c_int, c_int64_t
+  integer (kind=c_int) :: offload_signaled
+  integer (kind=c_int), value :: target_number
+  integer (kind=c_int64_t), value :: signal
+!dec$ attributes offload:mic :: offload_signaled
+end function offload_signaled
+
+subroutine offload_report(val)                                         &
+           bind (C, name = "_Offload_report")
+!dec$ attributes default :: offload_report
+  import :: c_int
+  integer (kind=c_int), value :: val
+!dec$ attributes offload:mic :: offload_report
+end subroutine offload_report
+
+function offload_get_device_number()                                   &
+           bind (C, name = "_Offload_get_device_number")
+!dec$ attributes default :: offload_get_device_number
+  import :: c_int
+  integer (kind=c_int)        :: offload_get_device_number
+!dec$ attributes offload:mic :: offload_get_device_number
+end function offload_get_device_number
+
+function offload_get_physical_device_number()                          &
+           bind (C, name = "_Offload_get_physical_device_number")
+!dec$ attributes default :: offload_get_physical_device_number
+  import :: c_int
+  integer (kind=c_int)        :: offload_get_physical_device_number
+!dec$ attributes offload:mic :: offload_get_physical_device_number
+end function offload_get_physical_device_number
+
+! OpenMP API wrappers
+
+subroutine omp_set_num_threads_target (target_type,                    &
+                                       target_number,                  &
+                                       num_threads)                    &
+           bind (C, name = "omp_set_num_threads_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, num_threads
+end subroutine omp_set_num_threads_target
+
+function omp_get_max_threads_target (target_type,                      &
+                                     target_number)                    &
+         bind (C, name = "omp_get_max_threads_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_max_threads_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_max_threads_target
+
+function omp_get_num_procs_target (target_type,                        &
+                                   target_number)                      &
+         bind (C, name = "omp_get_num_procs_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_num_procs_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_num_procs_target
+
+subroutine omp_set_dynamic_target (target_type,                        &
+                                   target_number,                      &
+                                   num_threads)                        &
+           bind (C, name = "omp_set_dynamic_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, num_threads
+end subroutine omp_set_dynamic_target
+
+function omp_get_dynamic_target (target_type,                          &
+                                 target_number)                        &
+         bind (C, name = "omp_get_dynamic_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_dynamic_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_dynamic_target
+
+subroutine omp_set_nested_target (target_type,                         &
+                                  target_number,                       &
+                                  nested)                              &
+           bind (C, name = "omp_set_nested_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, nested
+end subroutine omp_set_nested_target
+
+function omp_get_nested_target (target_type,                           &
+                                target_number)                         &
+         bind (C, name = "omp_get_nested_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_nested_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_nested_target
+
+subroutine omp_set_schedule_target (target_type,                       &
+                                    target_number,                     &
+                                    kind,                              &
+                                    modifier)                          &
+           bind (C, name = "omp_set_schedule_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, kind, modifier
+end subroutine omp_set_schedule_target
+
+subroutine omp_get_schedule_target (target_type,                       &
+                                    target_number,                     &
+                                    kind,                              &
+                                    modifier)                          &
+           bind (C, name = "omp_get_schedule_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: kind, modifier
+end subroutine omp_get_schedule_target
+
+! lock API functions
+
+subroutine omp_init_lock_target (target_type,                          &
+                                 target_number,                        &
+                                 lock)                                 &
+           bind (C, name = "omp_init_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_init_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_init_lock_target
+
+subroutine omp_destroy_lock_target (target_type,                       &
+                                    target_number,                     &
+                                    lock)                              &
+           bind (C, name = "omp_destroy_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_destroy_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_destroy_lock_target
+
+subroutine omp_set_lock_target (target_type,                           &
+                                target_number,                         &
+                                lock)                                  &
+           bind (C, name = "omp_set_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_set_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_set_lock_target
+
+subroutine omp_unset_lock_target (target_type,                         &
+                                  target_number,                       &
+                                  lock)                                &
+           bind (C, name = "omp_unset_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_unset_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_unset_lock_target
+
+function omp_test_lock_target (target_type,                            &
+                               target_number,                          &
+                               lock)                                   &
+           bind (C, name = "omp_test_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: omp_test_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end function omp_test_lock_target
+
+! nested lock API functions
+
+subroutine omp_init_nest_lock_target (target_type,                     &
+                                      target_number,                   &
+                                      lock)                            &
+           bind (C, name = "omp_init_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_init_nest_lock_target
+
+subroutine omp_destroy_nest_lock_target (target_type,                  &
+                                         target_number,                &
+                                         lock)                         &
+           bind (C, name = "omp_destroy_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_destroy_nest_lock_target
+
+subroutine omp_set_nest_lock_target (target_type,                      &
+                                     target_number,                    &
+                                     lock)                             &
+           bind (C, name = "omp_set_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_set_nest_lock_target
+
+subroutine omp_unset_nest_lock_target (target_type,                    &
+                                       target_number,                  &
+                                       lock)                           &
+           bind (C, name = "omp_unset_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_unset_nest_lock_target
+
+function omp_test_nest_lock_target (target_type,                       &
+                                    target_number,                     &
+                                    lock)                              &
+           bind (C, name = "omp_test_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: omp_test_nest_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end function omp_test_nest_lock_target
+
+! kmp API functions
+
+subroutine kmp_set_stacksize_target (target_type,                      &
+                                     target_number,                    &
+                                     size)                             &
+           bind (C, name = "kmp_set_stacksize_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, size
+end subroutine kmp_set_stacksize_target
+
+function kmp_get_stacksize_target (target_type,                        &
+                                   target_number)                      &
+         bind (C, name = "kmp_get_stacksize_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_stacksize_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_stacksize_target
+
+subroutine kmp_set_stacksize_s_target (target_type,                    &
+                                       target_number,                  &
+                                       size)                           &
+           bind (C, name = "kmp_set_stacksize_s_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, size
+end subroutine kmp_set_stacksize_s_target
+
+function kmp_get_stacksize_s_target (target_type,                      &
+                                     target_number)                    &
+         bind (C, name = "kmp_get_stacksize_s_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_stacksize_s_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_stacksize_s_target
+
+subroutine kmp_set_blocktime_target (target_type,                      &
+                                     target_number,                    &
+                                     time)                             &
+           bind (C, name = "kmp_set_blocktime_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, time
+end subroutine kmp_set_blocktime_target
+
+function kmp_get_blocktime_target (target_type,                        &
+                                   target_number)                      &
+         bind (C, name = "kmp_get_blocktime_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_blocktime_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_blocktime_target
+
+subroutine kmp_set_library_serial_target (target_type,                 &
+                                          target_number)               &
+           bind (C, name = "kmp_set_library_serial_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_library_serial_target
+
+subroutine kmp_set_library_turnaround_target (target_type,             &
+                                              target_number)           &
+           bind (C, name = "kmp_set_library_turnaround_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_library_turnaround_target
+
+subroutine kmp_set_library_throughput_target (target_type,             &
+                                              target_number)           &
+           bind (C, name = "kmp_set_library_throughput_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_library_throughput_target
+
+subroutine kmp_set_library_target (target_type,                        &
+                                   target_number,                      &
+                                   mode)                               &
+           bind (C, name = "kmp_set_library_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, mode
+end subroutine kmp_set_library_target
+
+function kmp_get_library_target (target_type,                          &
+                                 target_number)                        &
+         bind (C, name = "kmp_get_library_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_library_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_library_target
+
+subroutine kmp_set_defaults_target (target_type,                       &
+                                    target_number,                     &
+                                    defaults)                          &
+           bind (C, name = "kmp_set_defaults_target")
+  import :: c_int, c_char
+ character (kind=c_char) :: defaults(*)
+ integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_defaults_target
+
+! affinity API functions
+
+subroutine kmp_create_affinity_mask_target (target_type,               &
+                                            target_number,             &
+                                            mask)                      &
+           bind (C, name = "kmp_create_affinity_mask_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end subroutine kmp_create_affinity_mask_target
+
+subroutine kmp_destroy_affinity_mask_target (target_type,              &
+                                             target_number,            &
+                                             mask)                     &
+           bind (C, name = "kmp_destroy_affinity_mask_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end subroutine kmp_destroy_affinity_mask_target
+
+function kmp_set_affinity_target (target_type,                         &
+                                  target_number,                       &
+                                  mask)                                &
+           bind (C, name = "kmp_set_affinity_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_set_affinity_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_set_affinity_target
+
+function kmp_get_affinity_target (target_type,                         &
+                                  target_number,                       &
+                                  mask)                                &
+           bind (C, name = "kmp_get_affinity_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_get_affinity_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_get_affinity_target
+
+function kmp_get_affinity_max_proc_target (target_type,                &
+                                           target_number)              &
+           bind (C, name = "kmp_get_affinity_max_proc_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_affinity_max_proc_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_affinity_max_proc_target
+
+function kmp_set_affinity_mask_proc_target (target_type,               &
+                                            target_number,             &
+                                            proc,                      &
+                                            mask)                      &
+           bind (C, name = "kmp_set_affinity_mask_proc_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_set_affinity_mask_proc_target
+  integer (kind=c_int), value :: target_type, target_number, proc
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_set_affinity_mask_proc_target
+
+function kmp_unset_affinity_mask_proc_target (target_type,             &
+                                              target_number,           &
+                                              proc,                    &
+                                              mask)                    &
+           bind (C, name = "kmp_unset_affinity_mask_proc_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_unset_affinity_mask_proc_target
+  integer (kind=c_int), value :: target_type, target_number, proc
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_unset_affinity_mask_proc_target
+
+function kmp_get_affinity_mask_proc_target (target_type,               &
+                                            target_number,             &
+                                            proc,                      &
+                                            mask)                      &
+           bind (C, name = "kmp_get_affinity_mask_proc_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_get_affinity_mask_proc_target
+  integer (kind=c_int), value :: target_type, target_number, proc
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_get_affinity_mask_proc_target
+
+end interface
+end module mic_lib

Added: openmp/trunk/offload/src/offload.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload.h (added)
+++ openmp/trunk/offload/src/offload.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,474 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+ * Include file for Offload API.
+ */
+
+#ifndef OFFLOAD_H_INCLUDED
+#define OFFLOAD_H_INCLUDED
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <bits/functexcept.h>
+#endif
+
+#include <stddef.h>
+#include <omp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define TARGET_ATTRIBUTE __declspec(target(mic))
+
+/*
+ *  The target architecture.
+ */
+typedef enum TARGET_TYPE {
+    TARGET_NONE,    /* Undefine target */
+    TARGET_HOST,    /* Host used as target */
+    TARGET_MIC      /* MIC target */
+} TARGET_TYPE;
+
+/*
+ *  The default target type.
+ */
+#define DEFAULT_TARGET_TYPE TARGET_MIC
+
+/*
+ *  The default target number.
+ */
+#define DEFAULT_TARGET_NUMBER 0
+
+/*
+ *  Offload status.
+ */
+typedef enum {
+    OFFLOAD_SUCCESS = 0,
+    OFFLOAD_DISABLED,               /* offload is disabled */
+    OFFLOAD_UNAVAILABLE,            /* card is not available */
+    OFFLOAD_OUT_OF_MEMORY,          /* not enough memory on device */
+    OFFLOAD_PROCESS_DIED,           /* target process has died */
+    OFFLOAD_ERROR                   /* unspecified error */
+} _Offload_result;
+
+typedef struct {
+    _Offload_result result;         /* result, see above */
+    int             device_number;  /* device number */
+    size_t          data_sent;      /* number of bytes sent to the target */
+    size_t          data_received;  /* number of bytes received by host */
+} _Offload_status;
+
+#define OFFLOAD_STATUS_INIT(x) \
+    ((x).result = OFFLOAD_DISABLED)
+
+#define OFFLOAD_STATUS_INITIALIZER \
+    { OFFLOAD_DISABLED, -1, 0, 0 }
+
+/* Offload runtime interfaces */
+
+extern int _Offload_number_of_devices(void);
+extern int _Offload_get_device_number(void);
+extern int _Offload_get_physical_device_number(void);
+
+extern void* _Offload_shared_malloc(size_t size);
+extern void  _Offload_shared_free(void *ptr);
+
+extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
+extern void  _Offload_shared_aligned_free(void *ptr);
+
+extern int _Offload_signaled(int index, void *signal);
+extern void _Offload_report(int val);
+
+/* OpenMP API */
+
+extern void omp_set_default_device(int num);
+extern int  omp_get_default_device(void);
+extern int  omp_get_num_devices(void);
+
+/* OpenMP API wrappers */
+
+/* Set num_threads on target */
+extern void omp_set_num_threads_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+/* Get max_threads from target */
+extern int omp_get_max_threads_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+/* Get num_procs from target */
+extern int omp_get_num_procs_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+/* Set dynamic on target */
+extern void omp_set_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+/* Get dynamic from target */
+extern int omp_get_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+/* Set nested on target */
+extern void omp_set_nested_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int nested
+);
+
+/* Get nested from target */
+extern int omp_get_nested_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_num_threads_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+extern int omp_get_max_threads_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern int omp_get_num_procs_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+extern int omp_get_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_nested_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+extern int omp_get_nested_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t kind,
+    int modifier
+);
+
+extern void omp_get_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t *kind,
+    int *modifier
+);
+
+/* lock API functions */
+
+typedef struct {
+    omp_lock_t lock;
+} omp_lock_target_t;
+
+extern void omp_init_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern void omp_destroy_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern void omp_set_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern void omp_unset_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern int omp_test_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+/* nested lock API functions */
+
+typedef struct {
+    omp_nest_lock_t lock;
+} omp_nest_lock_target_t;
+
+extern void omp_init_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern void omp_destroy_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern void omp_set_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern void omp_unset_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern int omp_test_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+/* kmp API functions */
+
+extern void kmp_set_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int size
+);
+
+extern int kmp_get_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    size_t size
+);
+
+extern size_t kmp_get_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int time
+);
+
+extern int kmp_get_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_serial_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_turnaround_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_throughput_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int mode
+);
+
+extern int kmp_get_library_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_defaults_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    char const *defaults
+);
+
+/* affinity API functions */
+
+typedef struct {
+    kmp_affinity_mask_t mask;
+} kmp_affinity_mask_target_t;
+
+extern void kmp_create_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern void kmp_destroy_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_set_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_get_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_get_affinity_max_proc_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern int kmp_set_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_unset_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_get_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+);
+
+#ifdef __cplusplus
+} /* extern "C" */
+
+/* Namespace for the shared_allocator. */
+namespace __offload {
+  /* This follows the specification for std::allocator. */
+  /* Forward declaration of the class template. */
+  template <typename T>
+  class shared_allocator;
+
+  /* Specialization for shared_allocator<void>. */
+  template <>
+  class shared_allocator<void> {
+  public:
+    typedef void       *pointer;
+    typedef const void *const_pointer;
+    typedef void        value_type;
+    template <class U> struct rebind { typedef shared_allocator<U> other; };
+  };
+
+  /* Definition of shared_allocator<T>. */
+  template <class T>
+  class shared_allocator {
+  public:
+    typedef size_t     size_type;
+    typedef ptrdiff_t  difference_type;
+    typedef T         *pointer;
+    typedef const T   *const_pointer;
+    typedef T         &reference;
+    typedef const T   &const_reference;
+    typedef T          value_type;
+    template <class U> struct rebind { typedef shared_allocator<U> other; };
+    shared_allocator() throw() { }
+    shared_allocator(const shared_allocator&) throw() { }
+    template <class U> shared_allocator(const shared_allocator<U>&) throw() { }
+    ~shared_allocator() throw() { }
+    pointer address(reference x) const { return &x; }
+    const_pointer address(const_reference x) const { return &x; }
+    pointer allocate(
+      size_type, shared_allocator<void>::const_pointer hint = 0);
+    void deallocate(pointer p, size_type n);
+    size_type max_size() const throw() {
+      return size_type(-1)/sizeof(T);
+    } /* max_size */
+    void construct(pointer p, const T& arg) {
+      ::new (p) T(arg);
+    } /* construct */
+    void destroy(pointer p) {
+      p->~T();
+    } /* destroy */
+  };
+
+  /* Definition for allocate. */
+  template <class T>
+  typename shared_allocator<T>::pointer
+  shared_allocator<T>::allocate(shared_allocator<T>::size_type s,
+                                shared_allocator<void>::const_pointer) {
+    /* Allocate from shared memory. */
+    void *ptr = _Offload_shared_malloc(s*sizeof(T));
+    if (ptr == 0) std::__throw_bad_alloc();
+    return static_cast<pointer>(ptr);
+  } /* allocate */
+
+  template <class T>
+  void shared_allocator<T>::deallocate(pointer p,
+                                       shared_allocator<T>::size_type) {
+    /* Free the shared memory. */
+    _Offload_shared_free(p);
+  } /* deallocate */
+
+  template <typename _T1, typename _T2>
+  inline bool operator==(const shared_allocator<_T1> &, 
+                         const shared_allocator<_T2> &) throw() {
+    return true;
+  }  /* operator== */
+
+  template <typename _T1, typename _T2>
+  inline bool operator!=(const shared_allocator<_T1> &, 
+                         const shared_allocator<_T2> &) throw() {
+    return false;
+  }  /* operator!= */
+}  /* __offload */
+#endif /* __cplusplus */
+
+#endif /* OFFLOAD_H_INCLUDED */

Added: openmp/trunk/offload/src/offload_common.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_common.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_common.cpp (added)
+++ openmp/trunk/offload/src/offload_common.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,170 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
+#include "offload_common.h"
+
+// The debug routines
+
+#if OFFLOAD_DEBUG > 0
+
+void __dump_bytes(
+    int trace_level,
+    const void *data,
+    int len
+)
+{
+    if (console_enabled > trace_level) {
+        const uint8_t *arr = (const uint8_t*) data;
+        char buffer[4096];
+        char *bufferp;
+        int count = 0;
+
+        bufferp = buffer;
+        while (len--) {
+            sprintf(bufferp, "%02x", *arr++);
+            bufferp += 2;
+            count++;
+            if ((count&3) == 0) {
+                sprintf(bufferp, " ");
+                bufferp++;
+            }
+            if ((count&63) == 0) {
+                OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer);
+                bufferp = buffer;
+                count = 0;
+            }
+        }
+        if (count) {
+            OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer);
+        }
+    }
+}
+#endif // OFFLOAD_DEBUG
+
+// The Marshaller and associated routines
+
+void Marshaller::send_data(
+    const void *data,
+    int64_t length
+)
+{
+    OFFLOAD_DEBUG_TRACE(2, "send_data(%p, %lld)\n",
+                        data, length);
+    memcpy(buffer_ptr, data, (size_t)length);
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+void Marshaller::receive_data(
+    void *data,
+    int64_t length
+)
+{
+    OFFLOAD_DEBUG_TRACE(2, "receive_data(%p, %lld)\n",
+                        data, length);
+    memcpy(data, buffer_ptr, (size_t)length);
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+// Send function pointer
+void Marshaller::send_func_ptr(
+    const void* data
+)
+{
+    const char* name;
+    size_t      length;
+
+    if (data != 0) {
+        name = __offload_funcs.find_name(data);
+        if (name == 0) {
+#if OFFLOAD_DEBUG > 0
+            if (console_enabled > 2) {
+                __offload_funcs.dump();
+            }
+#endif // OFFLOAD_DEBUG > 0
+
+            LIBOFFLOAD_ERROR(c_send_func_ptr, data);
+            exit(1);
+        }
+        length = strlen(name) + 1;
+    }
+    else {
+        name = "";
+        length = 1;
+    }
+
+    memcpy(buffer_ptr, name, length);
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+// Receive function pointer
+void Marshaller::receive_func_ptr(
+    const void** data
+)
+{
+    const char* name;
+    size_t      length;
+
+    name = (const char*) buffer_ptr;
+    if (name[0] != '\0') {
+        *data = __offload_funcs.find_addr(name);
+        if (*data == 0) {
+#if OFFLOAD_DEBUG > 0
+            if (console_enabled > 2) {
+                __offload_funcs.dump();
+            }
+#endif // OFFLOAD_DEBUG > 0
+
+            LIBOFFLOAD_ERROR(c_receive_func_ptr, name);
+            exit(1);
+        }
+        length = strlen(name) + 1;
+    }
+    else {
+        *data = 0;
+        length = 1;
+    }
+
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+// End of the Marshaller and associated routines
+
+extern void *OFFLOAD_MALLOC(
+    size_t size,
+    size_t align
+)
+{
+    void *ptr;
+    int   err;
+
+    OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align);
+
+    if (align < sizeof(void*)) {
+        align = sizeof(void*);
+    }
+
+    ptr = _mm_malloc(size, align);
+    if (ptr == NULL) {
+        LIBOFFLOAD_ERROR(c_offload_malloc, size, align);
+        exit(1);
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr);
+
+    return ptr;
+}

Added: openmp/trunk/offload/src/offload_common.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_common.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_common.h (added)
+++ openmp/trunk/offload/src/offload_common.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,444 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The parts of the runtime library common to host and target
+*/
+
+#ifndef OFFLOAD_COMMON_H_INCLUDED
+#define OFFLOAD_COMMON_H_INCLUDED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+#include "offload.h"
+#include "offload_table.h"
+#include "offload_trace.h"
+#include "offload_timer.h"
+#include "offload_util.h"
+#include "cean_util.h"
+#include "dv_util.h"
+#include "liboffload_error_codes.h"
+
+#include <stdarg.h>
+
+// The debug routines
+
+// Host console and file logging
+extern int console_enabled;
+extern int offload_report_level;
+
+#define OFFLOAD_DO_TRACE (offload_report_level == 3)
+
+extern const char *prefix;
+extern int offload_number;
+#if !HOST_LIBRARY
+extern int mic_index;
+#endif
+
+#if HOST_LIBRARY
+void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
+void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
+void offload_report_free_data(OffloadHostTimerData * timer_data);
+void Offload_Timer_Print(void);
+
+#ifndef TARGET_WINNT
+#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
+        __sync_add_and_fetch(&offload_number, 1)
+#else
+#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
+        _InterlockedIncrement(reinterpret_cast<long*>(&offload_number))
+#endif
+
+#define OFFLOAD_DEBUG_PRINT_TAG_PREFIX() \
+        printf("%s:  ", prefix);
+
+#define OFFLOAD_DEBUG_PRINT_PREFIX() \
+        printf("%s:  ", prefix);
+#else
+#define OFFLOAD_DEBUG_PRINT_PREFIX() \
+        printf("%s%d:  ", prefix, mic_index);
+#endif // HOST_LIBRARY
+
+#define OFFLOAD_TRACE(trace_level, ...)  \
+    if (console_enabled >= trace_level) { \
+        OFFLOAD_DEBUG_PRINT_PREFIX(); \
+        printf(__VA_ARGS__); \
+        fflush(NULL); \
+    }
+
+#if OFFLOAD_DEBUG > 0
+
+#define OFFLOAD_DEBUG_TRACE(level, ...) \
+    OFFLOAD_TRACE(level, __VA_ARGS__)
+
+#define OFFLOAD_REPORT(level, offload_number, stage, ...) \
+    if (OFFLOAD_DO_TRACE) { \
+        offload_stage_print(stage, offload_number, __VA_ARGS__); \
+        fflush(NULL); \
+    }
+
+#define OFFLOAD_DEBUG_TRACE_1(level, offload_number, stage, ...) \
+    if (OFFLOAD_DO_TRACE) { \
+        offload_stage_print(stage, offload_number, __VA_ARGS__); \
+        fflush(NULL); \
+    } \
+    if (!OFFLOAD_DO_TRACE) { \
+        OFFLOAD_TRACE(level, __VA_ARGS__) \
+    }
+
+#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
+    __dump_bytes(level, a, b)
+
+extern void __dump_bytes(
+    int level,
+    const void *data,
+    int len
+);
+
+#else
+
+#define OFFLOAD_DEBUG_LOG(level, ...)
+#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b)
+
+#endif
+
+// Runtime interface
+
+#define OFFLOAD_PREFIX(a) __offload_##a
+
+#define OFFLOAD_MALLOC            OFFLOAD_PREFIX(malloc)
+#define OFFLOAD_FREE(a)           _mm_free(a)
+
+// Forward functions
+
+extern void *OFFLOAD_MALLOC(size_t size, size_t align);
+
+// The Marshaller
+
+//! \enum Indicator for the type of entry on an offload item list.
+enum OffloadItemType {
+    c_data =   1,       //!< Plain data
+    c_data_ptr,         //!< Pointer data
+    c_func_ptr,         //!< Function pointer
+    c_void_ptr,         //!< void*
+    c_string_ptr,       //!< C string
+    c_dv,               //!< Dope vector variable
+    c_dv_data,          //!< Dope-vector data
+    c_dv_data_slice,    //!< Dope-vector data's slice
+    c_dv_ptr,           //!< Dope-vector variable pointer
+    c_dv_ptr_data,      //!< Dope-vector pointer data
+    c_dv_ptr_data_slice,//!< Dope-vector pointer data's slice
+    c_cean_var,         //!< CEAN variable
+    c_cean_var_ptr,     //!< Pointer to CEAN variable
+    c_data_ptr_array,   //!< Pointer to data pointer array
+    c_func_ptr_array,   //!< Pointer to function pointer array
+    c_void_ptr_array,   //!< Pointer to void* pointer array
+    c_string_ptr_array  //!< Pointer to char* pointer array
+};
+
+#define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \
+                            (t) == c_data_ptr || \
+                            (t) == c_cean_var_ptr || \
+                            (t) == c_dv_ptr)
+
+#define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \
+                               (t) == c_void_ptr || \
+                               (t) == c_cean_var || \
+                               (t) == c_dv)
+
+#define VAR_TYPE_IS_DV_DATA(t) ((t) == c_dv_data || \
+                                (t) == c_dv_ptr_data)
+
+#define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \
+                                      (t) == c_dv_ptr_data_slice)
+
+
+//! \enum Specify direction to copy offloaded variable.
+enum OffloadParameterType {
+    c_parameter_unknown = -1, //!< Unknown clause
+    c_parameter_nocopy,       //!< Variable listed in "nocopy" clause
+    c_parameter_in,           //!< Variable listed in "in" clause
+    c_parameter_out,          //!< Variable listed in "out" clause
+    c_parameter_inout         //!< Variable listed in "inout" clause
+};
+
+//! An Offload Variable descriptor
+struct VarDesc {
+    //! OffloadItemTypes of source and destination
+    union {
+        struct {
+            uint8_t dst : 4; //!< OffloadItemType of destination
+            uint8_t src : 4; //!< OffloadItemType of source
+        };
+        uint8_t bits;
+    } type;
+
+    //! OffloadParameterType that describes direction of data transfer
+    union {
+        struct {
+            uint8_t in  : 1; //!< Set if IN or INOUT
+            uint8_t out : 1; //!< Set if OUT or INOUT
+        };
+        uint8_t bits;
+    } direction;
+
+    uint8_t alloc_if;        //!< alloc_if modifier value
+    uint8_t free_if;         //!< free_if modifier value
+    uint32_t align;          //!< MIC alignment requested for pointer data
+    //! Not used by compiler; set to 0
+    /*! Used by runtime as offset to data from start of MIC buffer */
+    uint32_t mic_offset;
+    //! Flags describing this variable
+    union {
+        struct {
+            //! source variable has persistent storage
+            uint32_t is_static : 1;
+            //! destination variable has persistent storage
+            uint32_t is_static_dstn : 1;
+            //! has length for c_dv && c_dv_ptr
+            uint32_t has_length : 1;
+            //! persisted local scalar is in stack buffer
+            uint32_t is_stack_buf : 1;
+            //! buffer address is sent in data
+            uint32_t sink_addr : 1;
+            //! alloc displacement is sent in data
+            uint32_t alloc_disp : 1;
+            //! source data is noncontiguous
+            uint32_t is_noncont_src : 1;
+            //! destination data is noncontiguous
+            uint32_t is_noncont_dst : 1;
+        };
+        uint32_t bits;
+    } flags;
+    //! Not used by compiler; set to 0
+    /*! Used by runtime as offset to base from data stored in a buffer */
+    int64_t offset;
+    //! Element byte-size of data to be transferred
+    /*! For dope-vector, the size of the dope-vector      */
+    int64_t size;
+    union {
+        //! Set to 0 for array expressions and dope-vectors
+        /*! Set to 1 for scalars                          */
+        /*! Set to value of length modifier for pointers  */
+        int64_t count;
+        //! Displacement not used by compiler
+        int64_t disp;
+    };
+
+    //! This field not used by OpenMP 4.0
+    /*! The alloc section expression in #pragma offload   */
+    union {
+       void *alloc;
+       int64_t ptr_arr_offset;
+    };
+
+    //! This field not used by OpenMP 4.0
+    /*! The into section expression in #pragma offload    */
+    /*! For c_data_ptr_array this is the into ptr array   */
+    void *into;
+
+    //! For an ordinary variable, address of the variable
+    /*! For c_cean_var (C/C++ array expression),
+        pointer to arr_desc, which is an array descriptor. */
+    /*! For c_data_ptr_array (array of data pointers),
+        pointer to ptr_array_descriptor,
+        which is a descriptor for pointer array transfers. */
+    void *ptr;
+};
+
+//! Auxiliary struct used when -g is enabled that holds variable names
+struct VarDesc2 {
+    const char *sname; //!< Source name
+    const char *dname; //!< Destination name (when "into" is used)
+};
+
+/*! When the OffloadItemType is c_data_ptr_array
+    the ptr field of the main descriptor points to this struct.          */
+/*! The type in VarDesc1 merely says c_cean_data_ptr, but the pointer
+    type can be c_data_ptr, c_func_ptr, c_void_ptr, or c_string_ptr.
+    Therefore the actual pointer type is in the flags field of VarDesc3. */
+/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
+    is 0 then alignment/alloc_if/free_if are specified in VarDesc1.      */
+/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
+    is 1 then align_array/alloc_if_array/free_if_array specify
+    the set of alignment/alloc_if/free_if values.                        */
+/*! For the other fields, if neither the scalar nor the array flag
+    is set, then that modifier was not specified. If the bits are set
+    they specify which modifier was set and whether it was a
+    scalar or an array expression.                                       */
+struct VarDesc3
+{
+    void *ptr_array;        //!< Pointer to arr_desc of array of pointers
+    void *align_array;      //!< Scalar value or pointer to arr_desc
+    void *alloc_if_array;   //!< Scalar value or pointer to arr_desc
+    void *free_if_array;    //!< Scalar value or pointer to arr_desc
+    void *extent_start;     //!< Scalar value or pointer to arr_desc
+    void *extent_elements;  //!< Scalar value or pointer to arr_desc
+    void *into_start;       //!< Scalar value or pointer to arr_desc
+    void *into_elements;    //!< Scalar value or pointer to arr_desc
+    void *alloc_start;      //!< Scalar value or pointer to arr_desc
+    void *alloc_elements;   //!< Scalar value or pointer to arr_desc
+    /*! Flags that describe the pointer type and whether each field
+        is a scalar value or an array expression.        */
+    /*! First 6 bits are pointer array element type:
+        c_data_ptr, c_func_ptr, c_void_ptr, c_string_ptr */
+    /*! Then single bits specify:                        */
+    /*!     align_array is an array                      */
+    /*!     alloc_if_array is an array                   */
+    /*!     free_if_array is an array                    */
+    /*!     extent_start is a scalar expression          */
+    /*!     extent_start is an array expression          */
+    /*!     extent_elements is a scalar expression       */
+    /*!     extent_elements is an array expression       */
+    /*!     into_start is a scalar expression            */
+    /*!     into_start is an array expression            */
+    /*!     into_elements is a scalar expression         */
+    /*!     into_elements is an array expression         */
+    /*!     alloc_start is a scalar expression           */
+    /*!     alloc_start is an array expression           */
+    /*!     alloc_elements is a scalar expression        */
+    /*!     alloc_elements is an array expression        */
+    uint32_t array_fields;
+};
+const int flag_align_is_array = 6;
+const int flag_alloc_if_is_array = 7;
+const int flag_free_if_is_array = 8;
+const int flag_extent_start_is_scalar = 9;
+const int flag_extent_start_is_array = 10;
+const int flag_extent_elements_is_scalar = 11;
+const int flag_extent_elements_is_array = 12;
+const int flag_into_start_is_scalar = 13;
+const int flag_into_start_is_array = 14;
+const int flag_into_elements_is_scalar = 15;
+const int flag_into_elements_is_array = 16;
+const int flag_alloc_start_is_scalar = 17;
+const int flag_alloc_start_is_array = 18;
+const int flag_alloc_elements_is_scalar = 19;
+const int flag_alloc_elements_is_array = 20;
+
+// The Marshaller
+class Marshaller
+{
+private:
+    // Start address of buffer
+    char *buffer_start;
+
+    // Current pointer within buffer
+    char *buffer_ptr;
+
+    // Physical size of data sent (including flags)
+    long long buffer_size;
+
+    // User data sent/received
+    long long tfr_size;
+
+public:
+    // Constructor
+    Marshaller() :
+        buffer_start(0), buffer_ptr(0),
+        buffer_size(0), tfr_size(0)
+    {
+    }
+
+    // Return count of user data sent/received
+    long long get_tfr_size() const
+    {
+        return tfr_size;
+    }
+
+    // Return pointer to buffer
+    char *get_buffer_start() const
+    {
+        return buffer_start;
+    }
+
+    // Return current size of data in buffer
+    long long get_buffer_size() const
+    {
+        return buffer_size;
+    }
+
+    // Set buffer pointer
+    void init_buffer(
+        char *d,
+        long long s
+    )
+    {
+        buffer_start = buffer_ptr = d;
+        buffer_size = s;
+    }
+
+    // Send data
+    void send_data(
+        const void *data,
+        int64_t length
+    );
+
+    // Receive data
+    void receive_data(
+        void *data,
+        int64_t length
+    );
+
+    // Send function pointer
+    void send_func_ptr(
+        const void* data
+    );
+
+    // Receive function pointer
+    void receive_func_ptr(
+        const void** data
+    );
+};
+
+// End of the Marshaller
+
+// The offloaded function descriptor.
+// Sent from host to target to specify which function to run.
+// Also, sets console and file tracing levels.
+struct FunctionDescriptor
+{
+    // Input data size.
+    long long in_datalen;
+
+    // Output data size.
+    long long out_datalen;
+
+    // Whether trace is requested on console.
+    // A value of 1 produces only function name and data sent/received.
+    // Values > 1 produce copious trace information.
+    uint8_t console_enabled;
+
+    // Flag controlling timing on the target side.
+    // Values > 0 enable timing on sink.
+    uint8_t timer_enabled;
+
+    int offload_report_level;
+    int offload_number;
+
+    // number of variable descriptors
+    int vars_num;
+
+    // inout data offset if data is passed as misc/return data
+    // otherwise it should be zero.
+    int data_offset;
+
+    // The name of the offloaded function
+    char data[];
+};
+
+// typedef OFFLOAD.
+// Pointer to OffloadDescriptor.
+typedef struct OffloadDescriptor *OFFLOAD;
+
+#endif // OFFLOAD_COMMON_H_INCLUDED

Added: openmp/trunk/offload/src/offload_engine.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_engine.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_engine.cpp (added)
+++ openmp/trunk/offload/src/offload_engine.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,531 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_engine.h"
+#include <signal.h>
+#include <errno.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "offload_host.h"
+#include "offload_table.h"
+
+const char* Engine::m_func_names[Engine::c_funcs_total] =
+{
+    "server_compute",
+#ifdef MYO_SUPPORT
+    "server_myoinit",
+    "server_myofini",
+#endif // MYO_SUPPORT
+    "server_init",
+    "server_var_table_size",
+    "server_var_table_copy"
+};
+
+// Symbolic representation of system signals. Fix for CQ233593
+const char* Engine::c_signal_names[Engine::c_signal_max] =
+{
+    "Unknown SIGNAL",
+    "SIGHUP",    /*  1, Hangup (POSIX).  */
+    "SIGINT",    /*  2, Interrupt (ANSI).  */
+    "SIGQUIT",   /*  3, Quit (POSIX).  */
+    "SIGILL",    /*  4, Illegal instruction (ANSI).  */
+    "SIGTRAP",   /*  5, Trace trap (POSIX).  */
+    "SIGABRT",   /*  6, Abort (ANSI).  */
+    "SIGBUS",    /*  7, BUS error (4.2 BSD).  */
+    "SIGFPE",    /*  8, Floating-point exception (ANSI).  */
+    "SIGKILL",   /*  9, Kill, unblockable (POSIX).  */
+    "SIGUSR1",   /* 10, User-defined signal 1 (POSIX).  */
+    "SIGSEGV",   /* 11, Segmentation violation (ANSI).  */
+    "SIGUSR2",   /* 12, User-defined signal 2 (POSIX).  */
+    "SIGPIPE",   /* 13, Broken pipe (POSIX).  */
+    "SIGALRM",   /* 14, Alarm clock (POSIX).  */
+    "SIGTERM",   /* 15, Termination (ANSI).  */
+    "SIGSTKFLT", /* 16, Stack fault.  */
+    "SIGCHLD",   /* 17, Child status has changed (POSIX).  */
+    "SIGCONT",   /* 18, Continue (POSIX).  */
+    "SIGSTOP",   /* 19, Stop, unblockable (POSIX).  */
+    "SIGTSTP",   /* 20, Keyboard stop (POSIX).  */
+    "SIGTTIN",   /* 21, Background read from tty (POSIX).  */
+    "SIGTTOU",   /* 22, Background write to tty (POSIX).  */
+    "SIGURG",    /* 23, Urgent condition on socket (4.2 BSD).  */
+    "SIGXCPU",   /* 24, CPU limit exceeded (4.2 BSD).  */
+    "SIGXFSZ",   /* 25, File size limit exceeded (4.2 BSD).  */
+    "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD).  */
+    "SIGPROF",   /* 27, Profiling alarm clock (4.2 BSD).  */
+    "SIGWINCH",  /* 28, Window size change (4.3 BSD, Sun).  */
+    "SIGIO",     /* 29, I/O now possible (4.2 BSD).  */
+    "SIGPWR",    /* 30, Power failure restart (System V).  */
+    "SIGSYS"     /* 31, Bad system call.  */
+};
+
+void Engine::init(void)
+{
+    if (!m_ready) {
+        mutex_locker_t locker(m_lock);
+
+        if (!m_ready) {
+            // start process if not done yet
+            if (m_process == 0) {
+                init_process();
+            }
+
+            // load penging images
+            load_libraries();
+
+            // and (re)build pointer table
+            init_ptr_data();
+
+            // it is ready now
+            m_ready = true;
+        }
+    }
+}
+
+void Engine::init_process(void)
+{
+    COIENGINE engine;
+    COIRESULT res;
+    const char **environ;
+
+    // create environment for the target process
+    environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
+    if (environ != 0) {
+        for (const char **p = environ; *p != 0; p++) {
+            OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
+        }
+    }
+
+    // Create execution context in the specified device
+    OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
+                        m_physical_index);
+    res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
+    check_result(res, c_get_engine_handle, m_index, res);
+
+    // Target executable should be available by the time when we
+    // attempt to initialize the device
+    if (__target_exe == 0) {
+        LIBOFFLOAD_ERROR(c_no_target_exe);
+        exit(1);
+    }
+
+    OFFLOAD_DEBUG_TRACE(2,
+        "Loading target executable \"%s\" from %p, size %lld\n",
+        __target_exe->name, __target_exe->data, __target_exe->size);
+
+    res = COI::ProcessCreateFromMemory(
+        engine,                 // in_Engine
+        __target_exe->name,     // in_pBinaryName
+        __target_exe->data,     // in_pBinaryBuffer
+        __target_exe->size,     // in_BinaryBufferLength,
+        0,                      // in_Argc
+        0,                      // in_ppArgv
+        environ == 0,           // in_DupEnv
+        environ,                // in_ppAdditionalEnv
+        mic_proxy_io,           // in_ProxyActive
+        mic_proxy_fs_root,      // in_ProxyfsRoot
+        mic_buffer_size,        // in_BufferSpace
+        mic_library_path,       // in_LibrarySearchPath
+        __target_exe->origin,   // in_FileOfOrigin
+        __target_exe->offset,   // in_FileOfOriginOffset
+        &m_process              // out_pProcess
+    );
+    check_result(res, c_process_create, m_index, res);
+
+    // get function handles
+    res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
+                                         m_func_names, m_funcs);
+    check_result(res, c_process_get_func_handles, m_index, res);
+
+    // initialize device side
+    pid_t pid = init_device();
+
+    // For IDB
+    if (__dbg_is_attached) {
+        // TODO: we have in-memory executable now.
+        // Check with IDB team what should we provide them now?
+        if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
+            strcpy(__dbg_target_exe_name, __target_exe->name);
+        }
+        __dbg_target_so_pid = pid;
+        __dbg_target_id = m_physical_index;
+        __dbg_target_so_loaded();
+    }
+}
+
+void Engine::fini_process(bool verbose)
+{
+    if (m_process != 0) {
+        uint32_t sig;
+        int8_t ret;
+
+        // destroy target process
+        OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
+                            m_index);
+
+        COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
+        m_process = 0;
+
+        if (res == COI_SUCCESS) {
+            OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
+                                sig, ret);
+            if (verbose) {
+                if (sig != 0) {
+                    LIBOFFLOAD_ERROR(
+                        c_mic_process_exit_sig, m_index, sig,
+                        c_signal_names[sig >= c_signal_max ? 0 : sig]);
+                }
+                else {
+                    LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
+                }
+            }
+
+            // for idb
+            if (__dbg_is_attached) {
+                __dbg_target_so_unloaded();
+            }
+        }
+        else {
+            if (verbose) {
+                LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
+            }
+        }
+    }
+}
+
+void Engine::load_libraries()
+{
+    // load libraries collected so far
+    for (TargetImageList::iterator it = m_images.begin();
+         it != m_images.end(); it++) {
+        OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
+                            it->name, it->data, it->size);
+
+        // load library to the device
+        COILIBRARY lib;
+        COIRESULT res;
+        res = COI::ProcessLoadLibraryFromMemory(m_process,
+                                                it->data,
+                                                it->size,
+                                                it->name,
+                                                mic_library_path,
+                                                it->origin,
+                                                it->offset,
+                                                COI_LOADLIBRARY_V1_FLAGS,
+                                                &lib);
+
+        if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
+            check_result(res, c_load_library, m_index, res);
+        }
+    }
+    m_images.clear();
+}
+
+static bool target_entry_cmp(
+    const VarList::BufEntry &l,
+    const VarList::BufEntry &r
+)
+{
+    const char *l_name = reinterpret_cast<const char*>(l.name);
+    const char *r_name = reinterpret_cast<const char*>(r.name);
+    return strcmp(l_name, r_name) < 0;
+}
+
+static bool host_entry_cmp(
+    const VarTable::Entry *l,
+    const VarTable::Entry *r
+)
+{
+    return strcmp(l->name, r->name) < 0;
+}
+
+void Engine::init_ptr_data(void)
+{
+    COIRESULT res;
+    COIEVENT event;
+
+    // Prepare table of host entries
+    std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
+                                                   __offload_vars.end());
+
+    // no need to do anything further is host table is empty
+    if (host_table.size() <= 0) {
+        return;
+    }
+
+    // Get var table entries from the target.
+    // First we need to get size for the buffer to copy data
+    struct {
+        int64_t nelems;
+        int64_t length;
+    } params;
+
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_var_table_size],
+                                   0, 0, 0,
+                                   0, 0,
+                                   0, 0,
+                                   &params, sizeof(params),
+                                   &event);
+    check_result(res, c_pipeline_run_func, m_index, res);
+
+    res = COI::EventWait(1, &event, -1, 1, 0, 0);
+    check_result(res, c_event_wait, res);
+
+    if (params.length == 0) {
+        return;
+    }
+
+    // create buffer for target entries and copy data to host
+    COIBUFFER buffer;
+    res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
+                            &m_process, &buffer);
+    check_result(res, c_buf_create, m_index, res);
+
+    COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_var_table_copy],
+                                   1, &buffer, &flags,
+                                   0, 0,
+                                   &params.nelems, sizeof(params.nelems),
+                                   0, 0,
+                                   &event);
+    check_result(res, c_pipeline_run_func, m_index, res);
+
+    res = COI::EventWait(1, &event, -1, 1, 0, 0);
+    check_result(res, c_event_wait, res);
+
+    // patch names in target data
+    VarList::BufEntry *target_table;
+    COIMAPINSTANCE map_inst;
+    res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
+                         0, &map_inst,
+                         reinterpret_cast<void**>(&target_table));
+    check_result(res, c_buf_map, res);
+
+    VarList::table_patch_names(target_table, params.nelems);
+
+    // and sort entries
+    std::sort(target_table, target_table + params.nelems, target_entry_cmp);
+    std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
+
+    // merge host and target entries and enter matching vars map
+    std::vector<const VarTable::Entry*>::const_iterator hi =
+        host_table.begin();
+    std::vector<const VarTable::Entry*>::const_iterator he =
+        host_table.end();
+    const VarList::BufEntry *ti = target_table;
+    const VarList::BufEntry *te = target_table + params.nelems;
+
+    while (hi != he && ti != te) {
+        int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
+        if (res == 0) {
+            // add matching entry to var map
+            std::pair<PtrSet::iterator, bool> res =
+                m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
+
+            // store address for new entries
+            if (res.second) {
+                PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
+                ptr->mic_addr = ti->addr;
+                ptr->is_static = true;
+            }
+
+            hi++;
+            ti++;
+        }
+        else if (res < 0) {
+            hi++;
+        }
+        else {
+            ti++;
+        }
+    }
+
+    // cleanup
+    res = COI::BufferUnmap(map_inst, 0, 0, 0);
+    check_result(res, c_buf_unmap, res);
+
+    res = COI::BufferDestroy(buffer);
+    check_result(res, c_buf_destroy, res);
+}
+
+COIRESULT Engine::compute(
+    const std::list<COIBUFFER> &buffers,
+    const void*         data,
+    uint16_t            data_size,
+    void*               ret,
+    uint16_t            ret_size,
+    uint32_t            num_deps,
+    const COIEVENT*     deps,
+    COIEVENT*           event
+) /* const */
+{
+    COIBUFFER *bufs;
+    COI_ACCESS_FLAGS *flags;
+    COIRESULT res;
+
+    // convert buffers list to array
+    int num_bufs = buffers.size();
+    if (num_bufs > 0) {
+        bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
+        flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
+                                           sizeof(COI_ACCESS_FLAGS));
+
+        int i = 0;
+        for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
+             it != buffers.end(); it++) {
+            bufs[i] = *it;
+
+            // TODO: this should be fixed
+            flags[i++] = COI_SINK_WRITE;
+        }
+    }
+    else {
+        bufs = 0;
+        flags = 0;
+    }
+
+    // start computation
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_compute],
+                                   num_bufs, bufs, flags,
+                                   num_deps, deps,
+                                   data, data_size,
+                                   ret, ret_size,
+                                   event);
+    return res;
+}
+
+pid_t Engine::init_device(void)
+{
+    struct init_data {
+        int  device_index;
+        int  devices_total;
+        int  console_level;
+        int  offload_report_level;
+    } data;
+    COIRESULT res;
+    COIEVENT event;
+    pid_t pid;
+
+    OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
+                          "Initializing device with logical index %d "
+                          "and physical index %d\n",
+                           m_index, m_physical_index);
+
+    // setup misc data
+    data.device_index = m_index;
+    data.devices_total = mic_engines_total;
+    data.console_level = console_enabled;
+    data.offload_report_level = offload_report_level;
+
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_init],
+                                   0, 0, 0, 0, 0,
+                                   &data, sizeof(data),
+                                   &pid, sizeof(pid),
+                                   &event);
+    check_result(res, c_pipeline_run_func, m_index, res);
+
+    res = COI::EventWait(1, &event, -1, 1, 0, 0);
+    check_result(res, c_event_wait, res);
+
+    OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
+
+    return pid;
+}
+
+// data associated with each thread
+struct Thread {
+    Thread(long* addr_coipipe_counter) {
+        m_addr_coipipe_counter = addr_coipipe_counter;
+        memset(m_pipelines, 0, sizeof(m_pipelines));
+    }
+
+    ~Thread() {
+#ifndef TARGET_WINNT
+        __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
+#else // TARGET_WINNT
+        _InterlockedDecrement(m_addr_coipipe_counter);
+#endif // TARGET_WINNT
+        for (int i = 0; i < mic_engines_total; i++) {
+            if (m_pipelines[i] != 0) {
+                COI::PipelineDestroy(m_pipelines[i]);
+            }
+        }
+    }
+
+    COIPIPELINE get_pipeline(int index) const {
+        return m_pipelines[index];
+    }
+
+    void set_pipeline(int index, COIPIPELINE pipeline) {
+        m_pipelines[index] = pipeline;
+    }
+
+    AutoSet& get_auto_vars() {
+        return m_auto_vars;
+    }
+
+private:
+    long*       m_addr_coipipe_counter;
+    AutoSet     m_auto_vars;
+    COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
+};
+
+COIPIPELINE Engine::get_pipeline(void)
+{
+    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+    if (thread == 0) {
+        thread = new Thread(&m_proc_number);
+        thread_setspecific(mic_thread_key, thread);
+    }
+
+    COIPIPELINE pipeline = thread->get_pipeline(m_index);
+    if (pipeline == 0) {
+        COIRESULT res;
+        int proc_num;
+
+#ifndef TARGET_WINNT
+        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
+#else // TARGET_WINNT
+        proc_num = _InterlockedIncrement(&m_proc_number);
+#endif // TARGET_WINNT
+
+        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
+            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
+            LIBOFFLOAD_ABORT;
+        }
+        // create pipeline for this thread
+        res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
+        check_result(res, c_pipeline_create, m_index, res);
+
+        thread->set_pipeline(m_index, pipeline);
+    }
+    return pipeline;
+}
+
+AutoSet& Engine::get_auto_vars(void)
+{
+    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+    if (thread == 0) {
+        thread = new Thread(&m_proc_number);
+        thread_setspecific(mic_thread_key, thread);
+    }
+
+    return thread->get_auto_vars();
+}
+
+void Engine::destroy_thread_data(void *data)
+{
+    delete static_cast<Thread*>(data);
+}

Added: openmp/trunk/offload/src/offload_engine.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_engine.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_engine.h (added)
+++ openmp/trunk/offload/src/offload_engine.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,482 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_ENGINE_H_INCLUDED
+#define OFFLOAD_ENGINE_H_INCLUDED
+
+#include <limits.h>
+
+#include <list>
+#include <set>
+#include <map>
+#include "offload_common.h"
+#include "coi/coi_client.h"
+
+// Address range
+class MemRange {
+public:
+    MemRange() : m_start(0), m_length(0) {}
+    MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
+
+    const void* start() const {
+        return m_start;
+    }
+
+    const void* end() const {
+        return static_cast<const char*>(m_start) + m_length;
+    }
+
+    uint64_t length() const {
+        return m_length;
+    }
+
+    // returns true if given range overlaps with another one
+    bool overlaps(const MemRange &o) const {
+        // Two address ranges A[start, end) and B[start,end) overlap
+        // if A.start < B.end and A.end > B.start.
+        return start() < o.end() && end() > o.start();
+    }
+
+    // returns true if given range contains the other range
+    bool contains(const MemRange &o) const {
+        return start() <= o.start() && o.end() <= end();
+    }
+
+private:
+    const void* m_start;
+    uint64_t    m_length;
+};
+
+// Data associated with a pointer variable
+class PtrData {
+public:
+    PtrData(const void *addr, uint64_t len) :
+        cpu_addr(addr, len), cpu_buf(0),
+        mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
+        ref_count(0), is_static(false)
+    {}
+
+    //
+    // Copy constructor
+    //
+    PtrData(const PtrData& ptr):
+        cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
+        mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
+        mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
+        ref_count(ptr.ref_count), is_static(ptr.is_static)
+    {}
+
+    bool operator<(const PtrData &o) const {
+        // Variables are sorted by the CPU start address.
+        // Overlapping memory ranges are considered equal.
+        return (cpu_addr.start() < o.cpu_addr.start()) &&
+               !cpu_addr.overlaps(o.cpu_addr);
+    }
+
+    long add_reference() {
+        if (is_static) {
+            return LONG_MAX;
+        }
+#ifndef TARGET_WINNT
+        return __sync_fetch_and_add(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedIncrement(&ref_count) - 1;
+#endif // TARGET_WINNT
+    }
+
+    long remove_reference() {
+        if (is_static) {
+            return LONG_MAX;
+        }
+#ifndef TARGET_WINNT
+        return __sync_sub_and_fetch(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedDecrement(&ref_count);
+#endif // TARGET_WINNT
+    }
+
+    long get_reference() const {
+        if (is_static) {
+            return LONG_MAX;
+        }
+        return ref_count;
+    }
+
+public:
+    // CPU address range
+    const MemRange  cpu_addr;
+
+    // CPU and MIC buffers
+    COIBUFFER       cpu_buf;
+    COIBUFFER       mic_buf;
+
+    // placeholder for buffer address on mic
+    uint64_t        mic_addr;
+
+    uint64_t        alloc_disp;
+
+    // additional offset to pointer data on MIC for improving bandwidth for
+    // data which is not 4K aligned
+    uint32_t        mic_offset;
+
+    // if true buffers are created from static memory
+    bool            is_static;
+    mutex_t         alloc_ptr_data_lock;
+
+private:
+    // reference count for the entry
+    long            ref_count;
+};
+
+typedef std::list<PtrData*> PtrDataList;
+
+// Data associated with automatic variable
+class AutoData {
+public:
+    AutoData(const void *addr, uint64_t len) :
+        cpu_addr(addr, len), ref_count(0)
+    {}
+
+    bool operator<(const AutoData &o) const {
+        // Variables are sorted by the CPU start address.
+        // Overlapping memory ranges are considered equal.
+        return (cpu_addr.start() < o.cpu_addr.start()) &&
+               !cpu_addr.overlaps(o.cpu_addr);
+    }
+
+    long add_reference() {
+#ifndef TARGET_WINNT
+        return __sync_fetch_and_add(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedIncrement(&ref_count) - 1;
+#endif // TARGET_WINNT
+    }
+
+    long remove_reference() {
+#ifndef TARGET_WINNT
+        return __sync_sub_and_fetch(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedDecrement(&ref_count);
+#endif // TARGET_WINNT
+    }
+
+    long get_reference() const {
+        return ref_count;
+    }
+
+public:
+    // CPU address range
+    const MemRange cpu_addr;
+
+private:
+    // reference count for the entry
+    long ref_count;
+};
+
+// Set of autimatic variables
+typedef std::set<AutoData> AutoSet;
+
+// Target image data
+struct TargetImage
+{
+    TargetImage(const char *_name, const void *_data, uint64_t _size,
+                const char *_origin, uint64_t _offset) :
+        name(_name), data(_data), size(_size),
+        origin(_origin), offset(_offset)
+    {}
+
+    // library name
+    const char* name;
+
+    // contents and size
+    const void* data;
+    uint64_t    size;
+
+    // file of origin and offset within that file
+    const char* origin;
+    uint64_t    offset;
+};
+
+typedef std::list<TargetImage> TargetImageList;
+
+// Data associated with persistent auto objects
+struct PersistData
+{
+    PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
+        stack_cpu_addr(addr), routine_id(routine_num)
+    {
+        stack_ptr_data = new PtrData(0, size);
+    }
+    // 1-st key value - begining of the stack at CPU
+    const void *   stack_cpu_addr;
+    // 2-nd key value - identifier of routine invocation at CPU
+    uint64_t   routine_id;
+    // corresponded PtrData; only stack_ptr_data->mic_buf is used
+    PtrData * stack_ptr_data;
+    // used to get offset of the variable in stack buffer
+    char * cpu_stack_addr;
+};
+
+typedef std::list<PersistData> PersistDataList;
+
+// class representing a single engine
+struct Engine {
+    friend void __offload_init_library_once(void);
+    friend void __offload_fini_library(void);
+
+#define check_result(res, tag, ...) \
+    { \
+        if (res == COI_PROCESS_DIED) { \
+            fini_process(true); \
+            exit(1); \
+        } \
+        if (res != COI_SUCCESS) { \
+            __liboffload_error_support(tag, __VA_ARGS__); \
+            exit(1); \
+        } \
+    }
+
+    int get_logical_index() const {
+        return m_index;
+    }
+
+    int get_physical_index() const {
+        return m_physical_index;
+    }
+
+    const COIPROCESS& get_process() const {
+        return m_process;
+    }
+
+    // initialize device
+    void init(void);
+
+    // add new library
+    void add_lib(const TargetImage &lib)
+    {
+        m_lock.lock();
+        m_ready = false;
+        m_images.push_back(lib);
+        m_lock.unlock();
+    }
+
+    COIRESULT compute(
+        const std::list<COIBUFFER> &buffers,
+        const void*         data,
+        uint16_t            data_size,
+        void*               ret,
+        uint16_t            ret_size,
+        uint32_t            num_deps,
+        const COIEVENT*     deps,
+        COIEVENT*           event
+    );
+
+#ifdef MYO_SUPPORT
+    // temporary workaround for blocking behavior for myoiLibInit/Fini calls
+    void init_myo(COIEVENT *event) {
+        COIRESULT res;
+        res = COI::PipelineRunFunction(get_pipeline(),
+                                       m_funcs[c_func_myo_init],
+                                       0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                       event);
+        check_result(res, c_pipeline_run_func, m_index, res);
+    }
+
+    void fini_myo(COIEVENT *event) {
+        COIRESULT res;
+        res = COI::PipelineRunFunction(get_pipeline(),
+                                       m_funcs[c_func_myo_fini],
+                                       0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                       event);
+        check_result(res, c_pipeline_run_func, m_index, res);
+    }
+#endif // MYO_SUPPORT
+
+    //
+    // Memory association table
+    //
+    PtrData* find_ptr_data(const void *ptr) {
+        m_ptr_lock.lock();
+        PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
+        m_ptr_lock.unlock();
+        if (res == m_ptr_set.end()) {
+            return 0;
+        }
+        return const_cast<PtrData*>(res.operator->());
+    }
+
+    PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
+        m_ptr_lock.lock();
+        std::pair<PtrSet::iterator, bool> res =
+            m_ptr_set.insert(PtrData(ptr, len));
+        PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
+        m_ptr_lock.unlock();
+
+        is_new = res.second;
+        if (is_new) {
+            // It's necessary to lock as soon as possible.
+            // unlock must be done at call site of insert_ptr_data at
+            // branch for is_new
+            ptr_data->alloc_ptr_data_lock.lock();
+        }
+        return ptr_data;
+    }
+
+    void remove_ptr_data(const void *ptr) {
+        m_ptr_lock.lock();
+        m_ptr_set.erase(PtrData(ptr, 0));
+        m_ptr_lock.unlock();
+    }
+
+    //
+    // Automatic variables
+    //
+    AutoData* find_auto_data(const void *ptr) {
+        AutoSet &auto_vars = get_auto_vars();
+        AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
+        if (res == auto_vars.end()) {
+            return 0;
+        }
+        return const_cast<AutoData*>(res.operator->());
+    }
+
+    AutoData* insert_auto_data(const void *ptr, uint64_t len) {
+        AutoSet &auto_vars = get_auto_vars();
+        std::pair<AutoSet::iterator, bool> res =
+            auto_vars.insert(AutoData(ptr, len));
+        return const_cast<AutoData*>(res.first.operator->());
+    }
+
+    void remove_auto_data(const void *ptr) {
+        get_auto_vars().erase(AutoData(ptr, 0));
+    }
+
+    //
+    // Signals
+    //
+    void add_signal(const void *signal, OffloadDescriptor *desc) {
+        m_signal_lock.lock();
+        m_signal_map[signal] = desc;
+        m_signal_lock.unlock();
+    }
+
+    OffloadDescriptor* find_signal(const void *signal, bool remove) {
+        OffloadDescriptor *desc = 0;
+
+        m_signal_lock.lock();
+        {
+            SignalMap::iterator it = m_signal_map.find(signal);
+            if (it != m_signal_map.end()) {
+                desc = it->second;
+                if (remove) {
+                    m_signal_map.erase(it);
+                }
+            }
+        }
+        m_signal_lock.unlock();
+
+        return desc;
+    }
+
+    // stop device process
+    void fini_process(bool verbose);
+
+    // list of stacks active at the engine
+    PersistDataList m_persist_list;
+
+private:
+    Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
+               m_proc_number(0)
+    {}
+
+    ~Engine() {
+        if (m_process != 0) {
+            fini_process(false);
+        }
+    }
+
+    // set indexes
+    void set_indexes(int logical_index, int physical_index) {
+        m_index = logical_index;
+        m_physical_index = physical_index;
+    }
+
+    // start process on device
+    void init_process();
+
+    void load_libraries(void);
+    void init_ptr_data(void);
+
+    // performs library intialization on the device side
+    pid_t init_device(void);
+
+private:
+    // get pipeline associated with a calling thread
+    COIPIPELINE get_pipeline(void);
+
+    // get automatic vars set associated with the calling thread
+    AutoSet& get_auto_vars(void);
+
+    // destructor for thread data
+    static void destroy_thread_data(void *data);
+
+private:
+    typedef std::set<PtrData> PtrSet;
+    typedef std::map<const void*, OffloadDescriptor*> SignalMap;
+
+    // device indexes
+    int         m_index;
+    int         m_physical_index;
+
+    // number of COI pipes created for the engine
+    long        m_proc_number;
+
+    // process handle
+    COIPROCESS  m_process;
+
+    // If false, device either has not been initialized or new libraries
+    // have been added.
+    bool        m_ready;
+    mutex_t     m_lock;
+
+    // List of libraries to be loaded
+    TargetImageList m_images;
+
+    // var table
+    PtrSet      m_ptr_set;
+    mutex_t     m_ptr_lock;
+
+    // signals
+    SignalMap m_signal_map;
+    mutex_t   m_signal_lock;
+
+    // constants for accessing device function handles
+    enum {
+        c_func_compute = 0,
+#ifdef MYO_SUPPORT
+        c_func_myo_init,
+        c_func_myo_fini,
+#endif // MYO_SUPPORT
+        c_func_init,
+        c_func_var_table_size,
+        c_func_var_table_copy,
+        c_funcs_total
+    };
+    static const char* m_func_names[c_funcs_total];
+
+    // device function handles
+    COIFUNCTION m_funcs[c_funcs_total];
+
+    // int -> name mapping for device signals
+    static const int   c_signal_max = 32;
+    static const char* c_signal_names[c_signal_max];
+};
+
+#endif // OFFLOAD_ENGINE_H_INCLUDED

Added: openmp/trunk/offload/src/offload_env.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_env.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_env.cpp (added)
+++ openmp/trunk/offload/src/offload_env.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,354 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_env.h"
+#include <string.h>
+#include <ctype.h>
+#include "offload_util.h"
+#include "liboffload_error_codes.h"
+
+// for environment variables valid on all cards
+const int MicEnvVar::any_card = -1;
+
+MicEnvVar::~MicEnvVar()
+{
+    for (std::list<MicEnvVar::CardEnvVars*>::const_iterator
+         it = card_spec_list.begin();
+         it != card_spec_list.end(); it++) {
+        CardEnvVars *card_data = *it;
+        delete card_data;
+    }
+}
+
+MicEnvVar::VarValue::~VarValue()
+{
+    free(env_var_value);
+}
+
+MicEnvVar::CardEnvVars::~CardEnvVars()
+{
+    for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin();
+        it != env_vars.end(); it++) {
+            VarValue *var_value = *it;
+            delete var_value;
+    }
+}
+
+// Searching for card in "card_spec_list" list with the same "number"
+
+MicEnvVar::CardEnvVars* MicEnvVar::get_card(int number)
+{
+    if (number == any_card) {
+        return &common_vars;
+    }
+    for (std::list<MicEnvVar::CardEnvVars*>::const_iterator
+         it = card_spec_list.begin();
+         it != card_spec_list.end(); it++) {
+        CardEnvVars *card_data = *it;
+        if (card_data->card_number == number) {
+            return card_data;
+        }
+    }
+    return NULL;
+}
+
+// Searching for environment variable in "env_var" list with the same name
+
+MicEnvVar::VarValue* MicEnvVar::CardEnvVars::find_var(
+    char* env_var_name,
+    int env_var_name_length
+)
+{
+    for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin();
+        it != env_vars.end(); it++) {
+            VarValue *var_value = *it;
+            if (var_value->length == env_var_name_length &&
+                !strncmp(var_value->env_var, env_var_name,
+                         env_var_name_length)) {
+                return var_value;
+            }
+    }
+    return NULL;
+}
+
+void MicEnvVar::analyze_env_var(char *env_var_string)
+{
+    char          *env_var_name;
+    char          *env_var_def;
+    int           card_number;
+    int           env_var_name_length;
+    MicEnvVarKind env_var_kind;
+
+    env_var_kind = get_env_var_kind(env_var_string,
+                                    &card_number,
+                                    &env_var_name,
+                                    &env_var_name_length,
+                                    &env_var_def);
+    switch (env_var_kind) {
+        case c_mic_var:
+        case c_mic_card_var:
+            add_env_var(card_number,
+                        env_var_name,
+                        env_var_name_length,
+                        env_var_def);
+            break;
+        case c_mic_card_env:
+            mic_parse_env_var_list(card_number, env_var_def);
+            break;
+        case c_no_mic:
+        default:
+            break;
+    }
+}
+
+void MicEnvVar::add_env_var(
+    int card_number,
+    char *env_var_name,
+    int env_var_name_length,
+    char *env_var_def
+)
+{
+    VarValue *var;
+    CardEnvVars *card;
+
+    // The case corresponds to common env var definition of kind
+    // <mic-prefix>_<var>
+    if (card_number == any_card) {
+        card = &common_vars;
+    }
+    else {
+        card = get_card(card_number);
+        if (!card) {
+            // definition for new card occured
+            card = new CardEnvVars(card_number);
+            card_spec_list.push_back(card);
+        }
+
+    }
+    var = card->find_var(env_var_name, env_var_name_length);
+    if (!var) {
+        // put new env var definition in "env_var" list
+        var = new VarValue(env_var_name, env_var_name_length, env_var_def);
+        card->env_vars.push_back(var);
+    }
+}
+
+// The routine analyses string pointed by "env_var_string" argument
+// according to the following syntax:
+//
+// Specification of prefix for MIC environment variables
+// MIC_ENV_PREFIX=<mic-prefix>
+//
+// Setting single MIC environment variable
+// <mic-prefix>_<var>=<value>
+// <mic-prefix>_<card-number>_<var>=<value>
+
+// Setting multiple MIC environment variables
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+
+MicEnvVarKind MicEnvVar::get_env_var_kind(
+    char *env_var_string,
+    int *card_number,
+    char **env_var_name,
+    int *env_var_name_length,
+    char **env_var_def
+)
+{
+    int len = strlen(prefix);
+    char *c = env_var_string;
+    int num = 0;
+    bool card_is_set = false;
+
+    if (strncmp(c, prefix, len) != 0 || c[len] != '_') {
+            return c_no_mic;
+    }
+    c += len + 1;
+
+    *card_number = any_card;
+    if (isdigit(*c)) {
+        while (isdigit (*c)) {
+            num = (*c++ - '0') + (num * 10);
+        }
+    if (*c != '_') {
+        return c_no_mic;
+    }
+    c++;
+        *card_number = num;
+        card_is_set = true;
+    }
+    if (!isalpha(*c)) {
+        return c_no_mic;
+    }
+    *env_var_name = *env_var_def = c;
+    if (strncmp(c, "ENV=", 4) == 0) {
+        if (!card_is_set) {
+            *env_var_name_length = 3;
+            *env_var_name = *env_var_def = c;
+            *env_var_def = strdup(*env_var_def);
+            return  c_mic_var;
+        }
+        *env_var_def = c + strlen("ENV=");
+        *env_var_def = strdup(*env_var_def);
+        return c_mic_card_env;
+    }
+    if (isalpha(*c)) {
+        *env_var_name_length = 0;
+        while (isalnum(*c) || *c == '_') {
+            c++;
+            (*env_var_name_length)++;
+        }
+    }
+    if (*c != '=') {
+        return c_no_mic;
+    }
+    *env_var_def = strdup(*env_var_def);
+    return card_is_set? c_mic_card_var : c_mic_var;
+}
+
+// analysing <env-vars> in form:
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+// where:
+//
+// <env-vars>:
+//                <env-var>
+//                <env-vars> | <env-var>
+//
+// <env-var>:
+//                variable=value
+//                variable="value"
+//                variable=
+
+void MicEnvVar::mic_parse_env_var_list(
+    int card_number, char *env_vars_def_list)
+{
+    char *c = env_vars_def_list;
+    char *env_var_name;
+    int  env_var_name_length;
+    char *env_var_def;
+    bool var_is_quoted;
+
+    if (*c == '"') {
+        c++;
+    }
+    while (*c != 0) {
+        var_is_quoted = false;
+        env_var_name = c;
+        env_var_name_length = 0;
+        if (isalpha(*c)) {
+            while (isalnum(*c) || *c == '_') {
+                c++;
+                env_var_name_length++;
+            }
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_mic_parse_env_var_list1);
+            return;
+        }
+        if (*c != '=') {
+            LIBOFFLOAD_ERROR(c_mic_parse_env_var_list2);
+            return;
+        }
+        c++;
+
+        if (*c == '"') {
+            var_is_quoted = true;
+            c++;
+        }
+        // Environment variable values that contain | will need to be escaped.
+        while (*c != 0 && *c != '|' &&
+               (!var_is_quoted || *c != '"'))
+        {
+            // skip escaped symbol
+            if (*c == '\\') {
+                c++;
+            }
+            c++;
+        }
+        if (var_is_quoted) {
+            c++; // for "
+            while (*c != 0 && *c != '|') {
+                c++;
+            }
+        }
+
+        int sz = c - env_var_name;
+        env_var_def = (char*)malloc(sz);
+        memcpy(env_var_def, env_var_name, sz);
+        env_var_def[sz] = 0;
+
+        if (*c == '|') {
+            c++;
+            while (*c != 0 && *c == ' ') {
+                c++;
+            }
+        }
+        add_env_var(card_number,
+                    env_var_name,
+                    env_var_name_length,
+                    env_var_def);
+    }
+}
+
+// Collect all definitions for the card with number "card_num".
+// The returned result is vector of string pointers defining one
+// environment variable. The vector is terminated by NULL pointer.
+// In the begining of the vector there are env vars defined as
+// <mic-prefix>_<card-number>_<var>=<value>
+// or
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+// where <card-number> is equal to "card_num"
+// They are followed by definitions valid for any card
+// and absent in previous definitions.
+
+char** MicEnvVar::create_environ_for_card(int card_num)
+{
+    VarValue *var_value;
+    VarValue *var_value_find;
+    CardEnvVars *card_data = get_card(card_num);
+    CardEnvVars *card_data_common;
+    std::list<char*> new_env;
+    char **rez;
+
+    if (!prefix) {
+        return NULL;
+    }
+    // There is no personel env var definitions for the card with
+    // number "card_num"
+    if (!card_data) {
+        return create_environ_for_card(any_card);
+    }
+
+    for (std::list<MicEnvVar::VarValue*>::const_iterator
+         it = card_data->env_vars.begin();
+         it != card_data->env_vars.end(); it++) {
+        var_value = *it;
+        new_env.push_back(var_value->env_var_value);
+    }
+
+    if (card_num != any_card) {
+        card_data_common = get_card(any_card);
+        for (std::list<MicEnvVar::VarValue*>::const_iterator
+             it = card_data_common->env_vars.begin();
+             it != card_data_common->env_vars.end(); it++) {
+            var_value = *it;
+            var_value_find = card_data->find_var(var_value->env_var,
+                                                 var_value->length);
+            if (!var_value_find) {
+                new_env.push_back(var_value->env_var_value);
+            }
+        }
+    }
+
+    int new_env_size = new_env.size();
+    rez = (char**) malloc((new_env_size + 1) * sizeof(char*));
+    std::copy(new_env.begin(), new_env.end(), rez);
+    rez[new_env_size] = 0;
+    return rez;
+}

Added: openmp/trunk/offload/src/offload_env.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_env.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_env.h (added)
+++ openmp/trunk/offload/src/offload_env.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,91 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_ENV_H_INCLUDED
+#define OFFLOAD_ENV_H_INCLUDED
+
+#include <list>
+
+// data structure and routines to parse MIC user environment and pass to MIC
+
+enum MicEnvVarKind
+{
+    c_no_mic,         // not MIC env var
+    c_mic_var,        // for <mic-prefix>_<var>
+    c_mic_card_var,   // for <mic-prefix>_<card-number>_<var>
+    c_mic_card_env    // for <mic-prefix>_<card-number>_ENV
+};
+
+struct MicEnvVar {
+public:
+    MicEnvVar() : prefix(0) {}
+    ~MicEnvVar();
+
+    void analyze_env_var(char *env_var_string);
+    char** create_environ_for_card(int card_num);
+    MicEnvVarKind get_env_var_kind(
+        char *env_var_string,
+        int *card_number,
+        char **env_var_name,
+        int *env_var_name_length,
+        char **env_var_def
+    );
+    void add_env_var(
+        int card_number,
+        char *env_var_name,
+        int env_var_name_length,
+        char *env_var_def
+    );
+
+    void set_prefix(const char *pref) {
+        prefix = (pref && *pref != '\0') ? pref : 0;
+    }
+
+    struct VarValue {
+    public:
+        char* env_var;
+        int   length;
+        char* env_var_value;
+
+        VarValue(char* var, int ln, char* value)
+        {
+            env_var = var;
+            length = ln;
+            env_var_value = value;
+        }
+        ~VarValue();
+    };
+
+    struct CardEnvVars {
+    public:
+
+        int card_number;
+        std::list<struct VarValue*> env_vars;
+
+        CardEnvVars() { card_number = any_card; }
+        CardEnvVars(int num) { card_number = num; }
+        ~CardEnvVars();
+
+        void add_new_env_var(int number, char *env_var, int length,
+                             char *env_var_value);
+        VarValue* find_var(char* env_var_name, int env_var_name_length);
+    };
+    static const int any_card;
+
+private:
+    void         mic_parse_env_var_list(int card_number, char *env_var_def);
+    CardEnvVars* get_card(int number);
+
+    const char *prefix;
+    std::list<struct CardEnvVars *> card_spec_list;
+    CardEnvVars common_vars;
+};
+
+#endif // OFFLOAD_ENV_H_INCLUDED

Added: openmp/trunk/offload/src/offload_host.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_host.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_host.cpp (added)
+++ openmp/trunk/offload/src/offload_host.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,4360 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Forward declaration as the following 2 functions are declared as friend in offload_engine.h
+// CLANG does not like static to been after friend declaration.
+static void __offload_init_library_once(void);
+static void __offload_fini_library(void);
+
+#include "offload_host.h"
+#ifdef MYO_SUPPORT
+#include "offload_myo_host.h"
+#endif
+
+#include <malloc.h>
+#ifndef TARGET_WINNT
+#include <alloca.h>
+#include <elf.h>
+#endif // TARGET_WINNT
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <algorithm>
+#include <bitset>
+
+#if defined(HOST_WINNT)
+#define PATH_SEPARATOR ";"
+#else
+#define PATH_SEPARATOR ":"
+#endif
+
+#define GET_OFFLOAD_NUMBER(timer_data) \
+    timer_data? timer_data->offload_number : 0
+
+#ifdef TARGET_WINNT
+// Small subset of ELF declarations for Windows which is needed to compile
+// this file. ELF header is used to understand what binary type is contained
+// in the target image - shared library or executable.
+
+typedef uint16_t Elf64_Half;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+
+#define EI_NIDENT   16
+
+#define ET_EXEC     2
+#define ET_DYN      3
+
+typedef struct
+{
+    unsigned char e_ident[EI_NIDENT];
+    Elf64_Half    e_type;
+    Elf64_Half    e_machine;
+    Elf64_Word    e_version;
+    Elf64_Addr    e_entry;
+    Elf64_Off     e_phoff;
+    Elf64_Off     e_shoff;
+    Elf64_Word    e_flags;
+    Elf64_Half    e_ehsize;
+    Elf64_Half    e_phentsize;
+    Elf64_Half    e_phnum;
+    Elf64_Half    e_shentsize;
+    Elf64_Half    e_shnum;
+    Elf64_Half    e_shstrndx;
+} Elf64_Ehdr;
+#endif // TARGET_WINNT
+
+// Host console and file logging
+const char *prefix;
+int console_enabled = 0;
+int offload_number = 0;
+
+static const char *htrace_envname = "H_TRACE";
+static const char *offload_report_envname = "OFFLOAD_REPORT";
+static char *timer_envname = "H_TIME";
+
+// Trace information
+static const char* vardesc_direction_as_string[] = {
+    "NOCOPY",
+    "IN",
+    "OUT",
+    "INOUT"
+};
+static const char* vardesc_type_as_string[] = {
+    "unknown",
+    "data",
+    "data_ptr",
+    "func_ptr",
+    "void_ptr",
+    "string_ptr",
+    "dv",
+    "dv_data",
+    "dv_data_slice",
+    "dv_ptr",
+    "dv_ptr_data",
+    "dv_ptr_data_slice",
+    "cean_var",
+    "cean_var_ptr",
+    "c_data_ptr_array",
+    "c_func_ptr_array",
+    "c_void_ptr_array",
+    "c_string_ptr_array"
+};
+
+Engine*         mic_engines = 0;
+uint32_t        mic_engines_total = 0;
+pthread_key_t   mic_thread_key;
+MicEnvVar       mic_env_vars;
+uint64_t        cpu_frequency = 0;
+
+// MIC_STACKSIZE
+uint32_t mic_stack_size = 12 * 1024 * 1024;
+
+// MIC_BUFFERSIZE
+uint64_t mic_buffer_size = 0;
+
+// MIC_LD_LIBRARY_PATH
+char* mic_library_path = 0;
+
+// MIC_PROXY_IO
+bool mic_proxy_io = true;
+
+// MIC_PROXY_FS_ROOT
+char* mic_proxy_fs_root = 0;
+
+// Threshold for creating buffers with large pages. Buffer is created
+// with large pages hint if its size exceeds the threshold value.
+// By default large pages are disabled right now (by setting default
+// value for threshold to MAX) due to HSD 4114629.
+uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
+static const char *mic_use_2mb_buffers_envname  =
+    "MIC_USE_2MB_BUFFERS";
+
+static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
+static const char *mic_use_async_buffer_write_envname  =
+    "MIC_USE_ASYNC_BUFFER_WRITE";
+
+static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
+static const char *mic_use_async_buffer_read_envname  =
+    "MIC_USE_ASYNC_BUFFER_READ";
+
+// device initialization type
+OffloadInitType __offload_init_type = c_init_on_offload_all;
+static const char *offload_init_envname = "OFFLOAD_INIT";
+
+// active wait
+static bool __offload_active_wait = true;
+static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
+
+// OMP_DEFAULT_DEVICE
+int __omp_device_num = 0;
+static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
+
+// The list of pending target libraries
+static bool            __target_libs;
+static TargetImageList __target_libs_list;
+static mutex_t         __target_libs_lock;
+static mutex_t         stack_alloc_lock;
+
+// Target executable
+TargetImage*           __target_exe;
+
+static char * offload_get_src_base(void * ptr, uint8_t type)
+{
+    char *base;
+    if (VAR_TYPE_IS_PTR(type)) {
+        base = *static_cast<char**>(ptr);
+    }
+    else if (VAR_TYPE_IS_SCALAR(type)) {
+        base = static_cast<char*>(ptr);
+    }
+    else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
+        ArrDesc *dvp;
+        if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
+            const arr_desc *ap = static_cast<const arr_desc*>(ptr);
+            dvp = (type == c_dv_data_slice) ?
+                  reinterpret_cast<ArrDesc*>(ap->base) :
+                  *reinterpret_cast<ArrDesc**>(ap->base);
+        }
+        else {
+            dvp = (type == c_dv_data) ?
+                  static_cast<ArrDesc*>(ptr) :
+                  *static_cast<ArrDesc**>(ptr);
+        }
+        base = reinterpret_cast<char*>(dvp->Base);
+    }
+    else {
+        base = NULL;
+    }
+    return base;
+}
+
+void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
+{
+    // special case for the 'process died' error
+    if (res == COI_PROCESS_DIED) {
+        m_device.fini_process(true);
+    }
+    else {
+        switch (msg) {
+            case c_buf_create:
+                if (res == COI_OUT_OF_MEMORY) {
+                    msg = c_buf_create_out_of_mem;
+                }
+                /* fallthru */
+
+            case c_buf_create_from_mem:
+            case c_buf_get_address:
+            case c_pipeline_create:
+            case c_pipeline_run_func:
+                LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
+                break;
+
+            case c_buf_read:
+            case c_buf_write:
+            case c_buf_copy:
+            case c_buf_map:
+            case c_buf_unmap:
+            case c_buf_destroy:
+            case c_buf_set_state:
+                LIBOFFLOAD_ERROR(msg, res);
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    exit(1);
+}
+
+_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
+{
+    switch (res) {
+        case COI_SUCCESS:
+            return OFFLOAD_SUCCESS;
+
+        case COI_PROCESS_DIED:
+            return OFFLOAD_PROCESS_DIED;
+
+        case COI_OUT_OF_MEMORY:
+            return OFFLOAD_OUT_OF_MEMORY;
+
+        default:
+            return OFFLOAD_ERROR;
+    }
+}
+
+bool OffloadDescriptor::alloc_ptr_data(
+    PtrData* &ptr_data,
+    void *base,
+    int64_t disp,
+    int64_t size,
+    int64_t alloc_disp,
+    int align
+)
+{
+    // total length of base
+    int64_t length = disp + size;
+    bool is_new;
+
+    OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
+                  base, length);
+
+    // add new entry
+    ptr_data = m_device.insert_ptr_data(base, length, is_new);
+    if (is_new) {
+
+        OFFLOAD_TRACE(3, "Added new association\n");
+
+        if (length > 0) {
+            OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
+            COIRESULT res;
+
+            // align should be a power of 2
+            if (align > 0 && (align & (align - 1)) == 0) {
+                // offset within mic_buffer. Can do offset optimization
+                // only when source address alignment satisfies requested
+                // alignment on the target (cq172736).
+                if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
+                    ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
+                }
+            }
+
+            // buffer size and flags
+            uint64_t buffer_size = length + ptr_data->mic_offset;
+            uint32_t buffer_flags = 0;
+
+            // create buffer with large pages if data length exceeds
+            // large page threshold
+            if (length >= __offload_use_2mb_buffers) {
+                buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+            }
+
+            // create CPU buffer
+            OFFLOAD_DEBUG_TRACE_1(3,
+                          GET_OFFLOAD_NUMBER(get_timer_data()),
+                          c_offload_create_buf_host,
+                          "Creating buffer from source memory %p, "
+                          "length %lld\n", base, length);
+
+            // result is not checked because we can continue without cpu
+            // buffer. In this case we will use COIBufferRead/Write instead
+            // of COIBufferCopy.
+            COI::BufferCreateFromMemory(length,
+                                        COI_BUFFER_NORMAL,
+                                        0,
+                                        base,
+                                        1,
+                                        &m_device.get_process(),
+                                        &ptr_data->cpu_buf);
+
+            OFFLOAD_DEBUG_TRACE_1(3,
+                          GET_OFFLOAD_NUMBER(get_timer_data()),
+                          c_offload_create_buf_mic,
+                          "Creating buffer for sink: size %lld, offset %d, "
+                          "flags =0x%x\n", buffer_size - alloc_disp,
+                          ptr_data->mic_offset, buffer_flags);
+
+            // create MIC buffer
+            res = COI::BufferCreate(buffer_size - alloc_disp,
+                                    COI_BUFFER_NORMAL,
+                                    buffer_flags,
+                                    0,
+                                    1,
+                                    &m_device.get_process(),
+                                    &ptr_data->mic_buf);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                }
+                else if (m_is_mandatory) {
+                    report_coi_error(c_buf_create, res);
+                }
+                ptr_data->alloc_ptr_data_lock.unlock();
+                return false;
+            }
+
+            // make buffer valid on the device.
+            res = COI::BufferSetState(ptr_data->mic_buf,
+                                      m_device.get_process(),
+                                      COI_BUFFER_VALID,
+                                      COI_BUFFER_NO_MOVE,
+                                      0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                }
+                else if (m_is_mandatory) {
+                    report_coi_error(c_buf_set_state, res);
+                }
+                ptr_data->alloc_ptr_data_lock.unlock();
+                return false;
+            }
+
+            res = COI::BufferSetState(ptr_data->mic_buf,
+                                      COI_PROCESS_SOURCE,
+                                      COI_BUFFER_INVALID,
+                                      COI_BUFFER_NO_MOVE,
+                                      0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                }
+                else if (m_is_mandatory) {
+                    report_coi_error(c_buf_set_state, res);
+                }
+                ptr_data->alloc_ptr_data_lock.unlock();
+                return false;
+            }
+        }
+
+        ptr_data->alloc_disp = alloc_disp;
+        ptr_data->alloc_ptr_data_lock.unlock();
+    }
+    else {
+        mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
+
+        OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
+                      "is_static %d\n",
+                      ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+                      ptr_data->is_static);
+
+        // This is not a new entry. Make sure that provided address range fits
+        // into existing one.
+        MemRange addr_range(base, length - ptr_data->alloc_disp);
+        if (!ptr_data->cpu_addr.contains(addr_range)) {
+            LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+            exit(1);
+        }
+
+        // if the entry is associated with static data it may not have buffers
+        // created because they are created on demand.
+        if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::find_ptr_data(
+    PtrData* &ptr_data,
+    void *base,
+    int64_t disp,
+    int64_t size,
+    bool report_error
+)
+{
+    // total length of base
+    int64_t length = disp + size;
+
+    OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
+                  "length %lld\n", base, length);
+
+    // find existing association in pointer table
+    ptr_data = m_device.find_ptr_data(base);
+    if (ptr_data == 0) {
+        if (report_error) {
+            LIBOFFLOAD_ERROR(c_no_ptr_data, base);
+            exit(1);
+        }
+        OFFLOAD_TRACE(3, "Association does not exist\n");
+        return true;
+    }
+
+    OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
+                  ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+                  ptr_data->is_static);
+
+    // make sure that provided address range fits into existing one
+    MemRange addr_range(base, length);
+    if (!ptr_data->cpu_addr.contains(addr_range)) {
+        if (report_error) {
+            LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+            exit(1);
+        }
+        OFFLOAD_TRACE(3, "Existing association partially overlaps with "
+                      "data address range\n");
+        ptr_data = 0;
+        return true;
+    }
+
+    // if the entry is associated with static data it may not have buffers
+    // created because they are created on demand.
+    if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
+        return false;
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
+
+    if (ptr_data->cpu_buf == 0) {
+        OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
+                      ptr_data->cpu_addr.start());
+
+        COIRESULT res = COI::BufferCreateFromMemory(
+            ptr_data->cpu_addr.length(),
+            COI_BUFFER_NORMAL,
+            0,
+            const_cast<void*>(ptr_data->cpu_addr.start()),
+            1, &m_device.get_process(),
+            &ptr_data->cpu_buf);
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_buf_create_from_mem, res);
+        }
+    }
+
+    if (ptr_data->mic_buf == 0) {
+        OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
+                      ptr_data->mic_addr);
+
+        COIRESULT res = COI::BufferCreateFromMemory(
+            ptr_data->cpu_addr.length(),
+            COI_BUFFER_NORMAL,
+            COI_SINK_MEMORY,
+            reinterpret_cast<void*>(ptr_data->mic_addr),
+            1, &m_device.get_process(),
+            &ptr_data->mic_buf);
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_buf_create_from_mem, res);
+        }
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
+{
+    if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
+        COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
+                                                  &ptr_data->mic_addr);
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+            }
+            else if (m_is_mandatory) {
+                report_coi_error(c_buf_get_address, res);
+            }
+            return false;
+        }
+    }
+    return true;
+}
+
+bool OffloadDescriptor::nullify_target_stack(
+    COIBUFFER targ_buf,
+    uint64_t size
+)
+{
+    char * ptr = (char*)malloc(size);
+    COIRESULT res;
+
+    memset(ptr, 0, size);
+    res = COI::BufferWrite(
+        targ_buf,
+        0,
+        ptr,
+        size,
+        COI_COPY_UNSPECIFIED,
+        0, 0, 0);
+    free(ptr);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+            return false;
+        }
+        report_coi_error(c_buf_write, res);
+    }
+    return true;
+}
+
+bool OffloadDescriptor::offload_stack_memory_manager(
+    const void * stack_begin,
+    int  routine_id,
+    int  buf_size,
+    int  align,
+    bool *is_new)
+{
+    mutex_locker_t locker(stack_alloc_lock);
+
+    PersistData * new_el;
+    PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
+    PersistDataList::iterator it_end;
+    int erase = 0;
+
+    *is_new = false;
+
+    for (PersistDataList::iterator it = m_device.m_persist_list.begin();
+        it != m_device.m_persist_list.end(); it++) {
+        PersistData cur_el = *it;
+
+        if (stack_begin > it->stack_cpu_addr) {
+            // this stack data must be destroyed
+            m_destroy_stack.push_front(cur_el.stack_ptr_data);
+            it_end = it;
+            erase++;
+        }
+        else if (stack_begin == it->stack_cpu_addr) {
+            if (routine_id != it-> routine_id) {
+                // this stack data must be destroyed
+                m_destroy_stack.push_front(cur_el.stack_ptr_data);
+                it_end = it;
+                erase++;
+                break;
+            }
+            else {
+                // stack data is reused
+                m_stack_ptr_data = it->stack_ptr_data;
+                if (erase > 0) {
+                    // all obsolete stack sections must be erased from the list
+                    m_device.m_persist_list.erase(it_begin, ++it_end);
+
+                    m_in_datalen +=
+                        erase * sizeof(new_el->stack_ptr_data->mic_addr);
+                }
+                OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
+                                 m_stack_ptr_data->mic_addr);
+                return true;
+            }
+        }
+        else if (stack_begin < it->stack_cpu_addr) {
+            break;
+        }
+    }
+
+    if (erase > 0) {
+        // all obsolete stack sections must be erased from the list
+        m_device.m_persist_list.erase(it_begin, ++it_end);
+        m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
+    }
+    // new stack table is created
+    new_el = new PersistData(stack_begin, routine_id, buf_size);
+    // create MIC buffer
+    COIRESULT res;
+    uint32_t buffer_flags = 0;
+
+    // create buffer with large pages if data length exceeds
+    // large page threshold
+    if (buf_size >= __offload_use_2mb_buffers) {
+        buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+    }
+    res = COI::BufferCreate(buf_size,
+        COI_BUFFER_NORMAL,
+        buffer_flags,
+        0,
+        1,
+        &m_device.get_process(),
+        &new_el->stack_ptr_data->mic_buf);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+        }
+        else if (m_is_mandatory) {
+            report_coi_error(c_buf_create, res);
+        }
+        return false;
+    }
+    // make buffer valid on the device.
+    res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
+        m_device.get_process(),
+        COI_BUFFER_VALID,
+        COI_BUFFER_NO_MOVE,
+        0, 0, 0);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+        }
+        else if (m_is_mandatory) {
+            report_coi_error(c_buf_set_state, res);
+        }
+        return false;
+    }
+    res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
+        COI_PROCESS_SOURCE,
+        COI_BUFFER_INVALID,
+        COI_BUFFER_NO_MOVE,
+        0, 0, 0);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+        }
+        else if (m_is_mandatory) {
+            report_coi_error(c_buf_set_state, res);
+        }
+        return false;
+    }
+    // persistence algorithm requires target stack initialy to be nullified
+    if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
+        return false;
+    }
+
+    m_stack_ptr_data = new_el->stack_ptr_data;
+    init_mic_address(m_stack_ptr_data);
+    OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
+                      m_stack_ptr_data->mic_addr);
+    m_device.m_persist_list.push_front(*new_el);
+    init_mic_address(new_el->stack_ptr_data);
+    *is_new = true;
+    return true;
+}
+
+bool OffloadDescriptor::setup_descriptors(
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int vars_total,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    COIRESULT res;
+
+    OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
+
+    // make a copy of variable descriptors
+    m_vars_total = vars_total;
+    if (vars_total > 0) {
+        m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
+        memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
+        m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
+    }
+
+    // dependencies
+    m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total  + 1));
+    if (m_vars_total > 0) {
+        m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
+    }
+
+    // copyin/copyout data length
+    m_in_datalen = 0;
+    m_out_datalen = 0;
+
+    // First pass over variable descriptors
+    // - Calculate size of the input and output non-pointer data
+    // - Allocate buffers for input and output pointers
+    for (int i = 0; i < m_vars_total; i++) {
+        void*   alloc_base = NULL;
+        int64_t alloc_disp = 0;
+        int64_t alloc_size;
+        bool    src_is_for_mic = (m_vars[i].direction.out ||
+                                  m_vars[i].into == NULL);
+
+        const char *var_sname = "";
+        if (vars2 != NULL && i < vars_total) {
+            if (vars2[i].sname != NULL) {
+                var_sname = vars2[i].sname;
+            }
+        }
+        OFFLOAD_TRACE(2, "   VarDesc %d, var=%s, %s, %s\n",
+            i, var_sname,
+            vardesc_direction_as_string[m_vars[i].direction.bits],
+            vardesc_type_as_string[m_vars[i].type.src]);
+        if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
+            OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
+                vardesc_type_as_string[m_vars[i].type.dst]);
+        }
+        OFFLOAD_TRACE(2,
+            "              type_src=%d, type_dstn=%d, direction=%d, "
+            "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
+            "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
+            m_vars[i].type.src,
+            m_vars[i].type.dst,
+            m_vars[i].direction.bits,
+            m_vars[i].alloc_if,
+            m_vars[i].free_if,
+            m_vars[i].align,
+            m_vars[i].mic_offset,
+            m_vars[i].flags.bits,
+            m_vars[i].offset,
+            m_vars[i].size,
+            m_vars[i].count,
+            m_vars[i].ptr,
+            m_vars[i].into);
+
+        if (m_vars[i].alloc != NULL) {
+            // array descriptor
+            const arr_desc *ap =
+                static_cast<const arr_desc*>(m_vars[i].alloc);
+
+            // debug dump
+            __arr_desc_dump("    ", "ALLOC", ap, 0);
+
+            __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
+
+            alloc_base = reinterpret_cast<void*>(ap->base);
+        }
+
+        m_vars_extra[i].cpu_disp = 0;
+        m_vars_extra[i].cpu_offset = 0;
+        m_vars_extra[i].src_data = 0;
+        m_vars_extra[i].read_rng_src = 0;
+        m_vars_extra[i].read_rng_dst = 0;
+        // flag is_arr_ptr_el is 1 only for var_descs generated
+        // for c_data_ptr_array type
+        if (i < vars_total) {
+            m_vars_extra[i].is_arr_ptr_el = 0;
+        }
+
+        switch (m_vars[i].type.src) {
+            case c_data_ptr_array:
+                {
+                    const arr_desc *ap;
+                    const VarDesc3 *vd3 =
+                        static_cast<const VarDesc3*>(m_vars[i].ptr);
+                    int flags = vd3->array_fields;
+                    OFFLOAD_TRACE(2,
+                        "              pointer array flags = %04x\n", flags);
+                    OFFLOAD_TRACE(2,
+                        "              pointer array type is %s\n",
+                        vardesc_type_as_string[flags & 0x3f]);
+                    ap = static_cast<const arr_desc*>(vd3->ptr_array);
+                    __arr_desc_dump("              ", "ptr array", ap, 0);
+                    if (m_vars[i].into) {
+                        ap = static_cast<const arr_desc*>(m_vars[i].into);
+                        __arr_desc_dump(
+                            "              ", "into array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_align_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->align_array);
+                        __arr_desc_dump(
+                            "              ", "align array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+                        __arr_desc_dump(
+                            "              ", "alloc_if array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_free_if_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->free_if_array);
+                        __arr_desc_dump(
+                            "              ", "free_if array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_extent_start_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->extent_start);
+                        __arr_desc_dump(
+                            "              ", "extent_start array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_extent_start_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              extent_start scalar = %d\n",
+                            (int64_t)vd3->extent_start);
+                    }
+                    if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>
+                            (vd3->extent_elements);
+                        __arr_desc_dump(
+                            "              ", "extent_elements array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_extent_elements_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              extent_elements scalar = %d\n",
+                            (int64_t)vd3->extent_elements);
+                    }
+                    if ((flags & (1<<flag_into_start_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->into_start);
+                        __arr_desc_dump(
+                            "              ", "into_start array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_into_start_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              into_start scalar = %d\n",
+                            (int64_t)vd3->into_start);
+                    }
+                    if ((flags & (1<<flag_into_elements_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->into_elements);
+                        __arr_desc_dump(
+                            "              ", "into_elements array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_into_elements_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              into_elements scalar = %d\n",
+                            (int64_t)vd3->into_elements);
+                    }
+                    if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->alloc_start);
+                        __arr_desc_dump(
+                            "              ", "alloc_start array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_alloc_start_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              alloc_start scalar = %d\n",
+                            (int64_t)vd3->alloc_start);
+                    }
+                    if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+                        __arr_desc_dump(
+                            "              ", "alloc_elements array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_alloc_elements_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              alloc_elements scalar = %d\n",
+                            (int64_t)vd3->alloc_elements);
+                    }
+                }
+                if (!gen_var_descs_for_pointer_array(i)) {
+                    return false;
+                }
+                break;
+
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                // In all uses later
+                // VarDesc.size will have the length of the data to be
+                // transferred
+                // VarDesc.disp will have an offset from base
+                if (m_vars[i].type.src == c_cean_var) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                    // debug dump
+                    __arr_desc_dump("", "IN/OUT", ap, 0);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, m_vars[i].disp,
+                                                 m_vars[i].size);
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_src =
+                            init_read_ranges_arr_desc(ap);
+                    }
+                    // all necessary information about length and offset is
+                    // transferred in var descriptor. There is no need to send
+                    // array descriptor to the target side.
+                    m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
+                }
+                else {
+                    m_vars[i].size *= m_vars[i].count;
+                    m_vars[i].disp = 0;
+                }
+
+                if (m_vars[i].direction.bits) {
+                    // make sure that transfer size > 0
+                    if (m_vars[i].size <= 0) {
+                        LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
+                        exit(1);
+                    }
+
+                    if (m_vars[i].flags.is_static) {
+                        PtrData *ptr_data;
+
+                        // find data associated with variable
+                        if (!find_ptr_data(ptr_data,
+                                           m_vars[i].ptr,
+                                           m_vars[i].disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+
+                        if (ptr_data != 0) {
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            m_vars[i].offset =
+                                (char*) m_vars[i].ptr -
+                                (char*) ptr_data->cpu_addr.start();
+                        }
+                        else {
+                            m_vars[i].flags.is_static = false;
+                            if (m_vars[i].into == NULL) {
+                                m_vars[i].flags.is_static_dstn = false;
+                            }
+                        }
+                        m_vars_extra[i].src_data = ptr_data;
+                    }
+
+                    if (m_is_openmp) {
+                        if (m_vars[i].flags.is_static) {
+                            // Static data is transferred only by omp target
+                            // update construct which passes zeros for
+                            // alloc_if and free_if.
+                            if (m_vars[i].alloc_if || m_vars[i].free_if) {
+                                m_vars[i].direction.bits = c_parameter_nocopy;
+                            }
+                        }
+                        else {
+                            AutoData *auto_data;
+                            if (m_vars[i].alloc_if) {
+                                auto_data = m_device.insert_auto_data(
+                                    m_vars[i].ptr, m_vars[i].size);
+                                auto_data->add_reference();
+                            }
+                            else {
+                                // TODO: what should be done if var is not in
+                                // the table?
+                                auto_data = m_device.find_auto_data(
+                                    m_vars[i].ptr);
+                            }
+
+                            // For automatic variables data is transferred
+                            // only if alloc_if == 0 && free_if == 0
+                            // or reference count is 1
+                            if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
+                                auto_data != 0 &&
+                                auto_data->get_reference() != 1) {
+                                m_vars[i].direction.bits = c_parameter_nocopy;
+                            }
+
+                            // save data for later use
+                            m_vars_extra[i].auto_data = auto_data;
+                        }
+                    }
+
+                    if (m_vars[i].direction.in &&
+                        !m_vars[i].flags.is_static) {
+                        m_in_datalen += m_vars[i].size;
+
+                        // for non-static target destination defined as CEAN
+                        // expression we pass to target its size and dist
+                        if (m_vars[i].into == NULL &&
+                            m_vars[i].type.src == c_cean_var) {
+                            m_in_datalen += 2 * sizeof(uint64_t);
+                        }
+                        m_need_runfunction = true;
+                    }
+                    if (m_vars[i].direction.out &&
+                        !m_vars[i].flags.is_static) {
+                        m_out_datalen += m_vars[i].size;
+                        m_need_runfunction = true;
+                    }
+                }
+                break;
+
+            case c_dv:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
+
+                    // debug dump
+                    __dv_desc_dump("IN/OUT", dvp);
+
+                    // send dope vector contents excluding base
+                    m_in_datalen += m_vars[i].size - sizeof(uint64_t);
+                    m_need_runfunction = true;
+                }
+                break;
+
+            case c_string_ptr:
+                if ((m_vars[i].direction.bits ||
+                     m_vars[i].alloc_if ||
+                     m_vars[i].free_if) &&
+                    m_vars[i].size == 0) {
+                    m_vars[i].size = 1;
+                    m_vars[i].count =
+                        strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
+                }
+                /* fallthru */
+
+            case c_data_ptr:
+                if (m_vars[i].flags.is_stack_buf &&
+                    !m_vars[i].direction.bits &&
+                    m_vars[i].alloc_if) {
+                    // this var_desc is for stack buffer
+                    bool is_new;
+
+                    if (!offload_stack_memory_manager(
+                            stack_addr, entry_id,
+                            m_vars[i].count, m_vars[i].align, &is_new)) {
+                        return false;
+                    }
+                    if (is_new) {
+                        m_compute_buffers.push_back(
+                            m_stack_ptr_data->mic_buf);
+                        m_device.m_persist_list.front().cpu_stack_addr =
+                            static_cast<char*>(m_vars[i].ptr);
+                    }
+                    else {
+                        m_vars[i].flags.sink_addr = 1;
+                        m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
+                    }
+                    m_vars[i].size = m_destroy_stack.size();
+                    m_vars_extra[i].src_data = m_stack_ptr_data;
+                    // need to add reference for buffer
+                    m_need_runfunction = true;
+                    break;
+                }
+                /* fallthru */
+
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+                if (m_vars[i].type.src == c_cean_var_ptr) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                    // debug dump
+                    __arr_desc_dump("", "IN/OUT", ap, 1);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, m_vars[i].disp,
+                                                 m_vars[i].size);
+
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_src =
+                            init_read_ranges_arr_desc(ap);
+                    }
+                    // all necessary information about length and offset is
+                    // transferred in var descriptor. There is no need to send
+                    // array descriptor to the target side.
+                    m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
+                }
+                else if (m_vars[i].type.src == c_dv_ptr) {
+                    // need to send DV to the device unless it is 'nocopy'
+                    if (m_vars[i].direction.bits ||
+                        m_vars[i].alloc_if ||
+                        m_vars[i].free_if) {
+                        ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
+
+                        // debug dump
+                        __dv_desc_dump("IN/OUT", dvp);
+
+                        m_vars[i].direction.bits = c_parameter_in;
+                    }
+
+                    // no displacement
+                    m_vars[i].disp = 0;
+                }
+                else {
+                    // c_data_ptr or c_string_ptr
+                    m_vars[i].size *= m_vars[i].count;
+                    m_vars[i].disp = 0;
+                }
+
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    PtrData *ptr_data;
+
+                    // check that buffer length >= 0
+                    if (m_vars[i].alloc_if &&
+                        m_vars[i].disp + m_vars[i].size < 0) {
+                        LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
+                        exit(1);
+                    }
+
+                    // base address
+                    void *base = *static_cast<void**>(m_vars[i].ptr);
+
+                    // allocate buffer if we have no INTO and don't need
+                    // allocation for the ptr at target
+                    if (src_is_for_mic) {
+                        if (m_vars[i].flags.is_stack_buf) {
+                            // for stack persistent objects ptr data is created
+                            // by var_desc with number 0.
+                            // Its ptr_data is stored at m_stack_ptr_data
+                            ptr_data = m_stack_ptr_data;
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+                        else if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : m_vars[i].disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : m_vars[i].size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf != 0) {
+                                // add buffer to the list of buffers that
+                                // are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            bool error_if_not_found = true;
+                            if (m_is_openmp) {
+                                // For omp target update variable is ignored
+                                // if it does not exist.
+                                if (!m_vars[i].alloc_if &&
+                                    !m_vars[i].free_if) {
+                                    error_if_not_found = false;
+                                }
+                            }
+
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data,
+                                               base,
+                                               m_vars[i].disp,
+                                               m_vars[i].size,
+                                               error_if_not_found)) {
+                                return false;
+                            }
+
+                            if (m_is_openmp) {
+                                // make var nocopy if it does not exist
+                                if (ptr_data == 0) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data != 0) {
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+                        }
+
+                        if (ptr_data != 0) {
+                            if (m_is_openmp) {
+                                // data is transferred only if
+                                // alloc_if == 0 && free_if == 0
+                                // or reference count is 1
+                                if ((m_vars[i].alloc_if ||
+                                     m_vars[i].free_if) &&
+                                    ptr_data->get_reference() != 1) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data->alloc_disp != 0) {
+                                m_vars[i].flags.alloc_disp = 1;
+                                m_in_datalen += sizeof(alloc_disp);
+                            }
+
+                            if (m_vars[i].flags.sink_addr) {
+                                // get buffers's address on the sink
+                                if (!init_mic_address(ptr_data)) {
+                                    return false;
+                                }
+
+                                m_in_datalen += sizeof(ptr_data->mic_addr);
+                            }
+
+                            if (!ptr_data->is_static && m_vars[i].free_if) {
+                                // need to decrement buffer reference on target
+                                m_need_runfunction = true;
+                            }
+
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            m_vars[i].offset = (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+
+                            // copy other pointer properties to var descriptor
+                            m_vars[i].mic_offset = ptr_data->mic_offset;
+                            m_vars[i].flags.is_static = ptr_data->is_static;
+                        }
+                    }
+                    else {
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           m_vars[i].disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+                        if (ptr_data) {
+                            m_vars[i].offset =
+                                (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+                        }
+                    }
+
+                    // save pointer data
+                    m_vars_extra[i].src_data = ptr_data;
+                }
+                break;
+
+            case c_func_ptr:
+                if (m_vars[i].direction.in) {
+                    m_in_datalen += __offload_funcs.max_name_length();
+                }
+                if (m_vars[i].direction.out) {
+                    m_out_datalen += __offload_funcs.max_name_length();
+                }
+                m_need_runfunction = true;
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                ArrDesc *dvp;
+                if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+                    const arr_desc *ap;
+                    ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                    dvp = (m_vars[i].type.src == c_dv_data_slice) ?
+                          reinterpret_cast<ArrDesc*>(ap->base) :
+                          *reinterpret_cast<ArrDesc**>(ap->base);
+                }
+                else {
+                    dvp = (m_vars[i].type.src == c_dv_data) ?
+                          static_cast<ArrDesc*>(m_vars[i].ptr) :
+                          *static_cast<ArrDesc**>(m_vars[i].ptr);
+                }
+
+                // if allocatable dope vector isn't allocated don't
+                // transfer its data
+                if (!__dv_is_allocated(dvp)) {
+                    m_vars[i].direction.bits = c_parameter_nocopy;
+                    m_vars[i].alloc_if = 0;
+                    m_vars[i].free_if = 0;
+                }
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    const arr_desc *ap;
+
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+                        ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                        // debug dump
+                        __arr_desc_dump("", "IN/OUT", ap, 0);
+                    }
+                    if (!__dv_is_contiguous(dvp)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_src =
+                            init_read_ranges_dv(dvp);
+                    }
+
+                    // size and displacement
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+                        // offset and length are derived from the
+                        // array descriptor
+                        __arr_data_offset_and_length(ap,
+                                                     m_vars[i].disp,
+                                                     m_vars[i].size);
+                        if (m_vars[i].direction.bits) {
+                            if (!is_arr_desc_contiguous(ap)) {
+                                if (m_vars[i].flags.is_noncont_src) {
+                                    LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
+                                    return false;
+                                }
+                                m_vars[i].flags.is_noncont_src = 1;
+                                m_vars_extra[i].read_rng_src =
+                                    init_read_ranges_arr_desc(ap);
+                            }
+                        }
+                    }
+                    else {
+                        if (m_vars[i].flags.has_length) {
+                            m_vars[i].size =
+                                __dv_data_length(dvp, m_vars[i].count);
+                        }
+                        else {
+                            m_vars[i].size = __dv_data_length(dvp);
+                        }
+                        m_vars[i].disp = 0;
+                    }
+
+                    // check that length >= 0
+                    if (m_vars[i].alloc_if &&
+                        (m_vars[i].disp + m_vars[i].size < 0)) {
+                        LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
+                        exit(1);
+                    }
+
+                    // base address
+                    void *base = reinterpret_cast<void*>(dvp->Base);
+                    PtrData *ptr_data;
+
+                    // allocate buffer if we have no INTO and don't need
+                    // allocation for the ptr at target
+                    if (src_is_for_mic) {
+                        if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : m_vars[i].disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : m_vars[i].size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf != 0) {
+                                // add buffer to the list of buffers
+                                // that are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            bool error_if_not_found = true;
+                            if (m_is_openmp) {
+                                // For omp target update variable is ignored
+                                // if it does not exist.
+                                if (!m_vars[i].alloc_if &&
+                                    !m_vars[i].free_if) {
+                                    error_if_not_found = false;
+                                }
+                            }
+
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data,
+                                               base,
+                                               m_vars[i].disp,
+                                               m_vars[i].size,
+                                               error_if_not_found)) {
+                                return false;
+                            }
+
+                            if (m_is_openmp) {
+                                // make var nocopy if it does not exist
+                                if (ptr_data == 0) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data != 0) {
+                                // need to update base in dope vector on device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+                        }
+
+                        if (ptr_data != 0) {
+                            if (m_is_openmp) {
+                                // data is transferred only if
+                                // alloc_if == 0 && free_if == 0
+                                // or reference count is 1
+                                if ((m_vars[i].alloc_if ||
+                                     m_vars[i].free_if) &&
+                                    ptr_data->get_reference() != 1) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data->alloc_disp != 0) {
+                                m_vars[i].flags.alloc_disp = 1;
+                                m_in_datalen += sizeof(alloc_disp);
+                            }
+
+                            if (m_vars[i].flags.sink_addr) {
+                                // get buffers's address on the sink
+                                if (!init_mic_address(ptr_data)) {
+                                    return false;
+                                }
+
+                                m_in_datalen += sizeof(ptr_data->mic_addr);
+                            }
+
+                            if (!ptr_data->is_static && m_vars[i].free_if) {
+                                // need to decrement buffer reference on target
+                                m_need_runfunction = true;
+                            }
+
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            m_vars[i].offset =
+                                (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+
+                            // copy other pointer properties to var descriptor
+                            m_vars[i].mic_offset = ptr_data->mic_offset;
+                            m_vars[i].flags.is_static = ptr_data->is_static;
+                        }
+                    }
+                    else { // !src_is_for_mic
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           m_vars[i].disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+                        m_vars[i].offset = !ptr_data ? 0 :
+                                (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+                    }
+
+                    // save pointer data
+                    m_vars_extra[i].src_data = ptr_data;
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+                LIBOFFLOAD_ABORT;
+        }
+        if (m_vars[i].type.src == c_data_ptr_array) {
+            continue;
+        }
+
+        if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
+            m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
+                m_device.m_persist_list.front().cpu_stack_addr;
+        }
+        // if source is used at CPU save its offset and disp
+        if (m_vars[i].into == NULL || m_vars[i].direction.in) {
+            m_vars_extra[i].cpu_offset = m_vars[i].offset;
+            m_vars_extra[i].cpu_disp   = m_vars[i].disp;
+        }
+
+        // If "into" is define we need to do the similar work for it
+        if (!m_vars[i].into) {
+            continue;
+        }
+
+        int64_t into_disp =0, into_offset = 0;
+
+        switch (m_vars[i].type.dst) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var: {
+                int64_t size = m_vars[i].size;
+
+                if (m_vars[i].type.dst == c_cean_var) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].into);
+
+                    // debug dump
+                    __arr_desc_dump("    ", "INTO", ap, 0);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, into_disp, size);
+
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_dst = 1;
+                        m_vars_extra[i].read_rng_dst =
+                            init_read_ranges_arr_desc(ap);
+                        if (!cean_ranges_match(
+                            m_vars_extra[i].read_rng_src,
+                            m_vars_extra[i].read_rng_dst)) {
+                            LIBOFFLOAD_ERROR(c_ranges_dont_match);
+                            exit(1);
+                        }
+                    }
+                    m_vars[i].into = reinterpret_cast<void*>(ap->base);
+                }
+
+                int64_t size_src = m_vars_extra[i].read_rng_src ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+                    m_vars[i].size;
+                int64_t size_dst = m_vars_extra[i].read_rng_dst ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+                    size;
+                // It's supposed that "into" size must be not less
+                // than src size
+                if (size_src > size_dst) {
+                    LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+                                     size_src, size_dst);
+                    exit(1);
+                }
+
+                if (m_vars[i].direction.bits) {
+                    if (m_vars[i].flags.is_static_dstn) {
+                        PtrData *ptr_data;
+
+                        // find data associated with variable
+                        if (!find_ptr_data(ptr_data, m_vars[i].into,
+                                           into_disp, size, false)) {
+                            return false;
+                        }
+                        if (ptr_data != 0) {
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            into_offset =
+                                (char*) m_vars[i].into -
+                                (char*) ptr_data->cpu_addr.start();
+                        }
+                        else {
+                            m_vars[i].flags.is_static_dstn = false;
+                        }
+                        m_vars_extra[i].dst_data = ptr_data;
+                    }
+                }
+
+                if (m_vars[i].direction.in &&
+                    !m_vars[i].flags.is_static_dstn) {
+                    m_in_datalen += m_vars[i].size;
+
+                    // for non-static target destination defined as CEAN
+                    // expression we pass to target its size and dist
+                    if (m_vars[i].type.dst == c_cean_var) {
+                        m_in_datalen += 2 * sizeof(uint64_t);
+                    }
+                    m_need_runfunction = true;
+                }
+                break;
+            }
+
+            case c_dv:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
+
+                    // debug dump
+                    __dv_desc_dump("INTO", dvp);
+
+                    // send dope vector contents excluding base
+                    m_in_datalen += m_vars[i].size - sizeof(uint64_t);
+                    m_need_runfunction = true;
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr: {
+                int64_t size = m_vars[i].size;
+
+                if (m_vars[i].type.dst == c_cean_var_ptr) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].into);
+
+                    // debug dump
+                    __arr_desc_dump("    ", "INTO", ap, 1);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, into_disp, size);
+
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_dst =
+                            init_read_ranges_arr_desc(ap);
+                        if (!cean_ranges_match(
+                            m_vars_extra[i].read_rng_src,
+                            m_vars_extra[i].read_rng_dst)) {
+                            LIBOFFLOAD_ERROR(c_ranges_dont_match);
+                        }
+                    }
+                    m_vars[i].into = reinterpret_cast<char**>(ap->base);
+                }
+                else if (m_vars[i].type.dst == c_dv_ptr) {
+                    // need to send DV to the device unless it is 'nocopy'
+                    if (m_vars[i].direction.bits ||
+                        m_vars[i].alloc_if ||
+                        m_vars[i].free_if) {
+                        ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
+
+                        // debug dump
+                        __dv_desc_dump("INTO", dvp);
+
+                        m_vars[i].direction.bits = c_parameter_in;
+                    }
+                }
+
+                int64_t size_src = m_vars_extra[i].read_rng_src ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+                    m_vars[i].size;
+                int64_t size_dst = m_vars_extra[i].read_rng_dst ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+                    size;
+                // It's supposed that "into" size must be not less than
+                // src size
+                if (size_src > size_dst) {
+                    LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+                                     size_src, size_dst);
+                    exit(1);
+                }
+
+                if (m_vars[i].direction.bits) {
+                    PtrData *ptr_data;
+
+                    // base address
+                    void *base = *static_cast<void**>(m_vars[i].into);
+
+                    if (m_vars[i].direction.in) {
+                        // allocate buffer
+                        if (m_vars[i].flags.is_stack_buf) {
+                            // for stack persistent objects ptr data is created
+                            // by var_desc with number 0.
+                            // Its ptr_data is stored at m_stack_ptr_data
+                            ptr_data = m_stack_ptr_data;
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+                        else if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : into_disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf != 0) {
+                                // add buffer to the list of buffers that
+                                // are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+                                return false;
+                            }
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+
+                        if (ptr_data->alloc_disp != 0) {
+                            m_vars[i].flags.alloc_disp = 1;
+                            m_in_datalen += sizeof(alloc_disp);
+                        }
+
+                        if (m_vars[i].flags.sink_addr) {
+                            // get buffers's address on the sink
+                            if (!init_mic_address(ptr_data)) {
+                                return false;
+                            }
+
+                            m_in_datalen += sizeof(ptr_data->mic_addr);
+                        }
+
+                        if (!ptr_data->is_static && m_vars[i].free_if) {
+                            // need to decrement buffer reference on target
+                            m_need_runfunction = true;
+                        }
+
+                        // copy other pointer properties to var descriptor
+                        m_vars[i].mic_offset = ptr_data->mic_offset;
+                        m_vars[i].flags.is_static_dstn = ptr_data->is_static;
+                    }
+                    else {
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           into_disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+                    }
+                    if (ptr_data) {
+                        into_offset = ptr_data ?
+                            (char*) base -
+                            (char*) ptr_data->cpu_addr.start() :
+                            0;
+                    }
+                    // save pointer data
+                    m_vars_extra[i].dst_data = ptr_data;
+                }
+                break;
+            }
+
+            case c_func_ptr:
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    const arr_desc *ap;
+                    ArrDesc *dvp;
+                    PtrData *ptr_data;
+                    int64_t disp;
+                    int64_t size;
+
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+                        ap = static_cast<const arr_desc*>(m_vars[i].into);
+
+                        // debug dump
+                        __arr_desc_dump("    ", "INTO", ap, 0);
+
+                        dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
+                              reinterpret_cast<ArrDesc*>(ap->base) :
+                              *reinterpret_cast<ArrDesc**>(ap->base);
+                    }
+                    else {
+                        dvp = (m_vars[i].type.dst == c_dv_data) ?
+                              static_cast<ArrDesc*>(m_vars[i].into) :
+                              *static_cast<ArrDesc**>(m_vars[i].into);
+                    }
+                    if (!__dv_is_contiguous(dvp)) {
+                        m_vars[i].flags.is_noncont_dst = 1;
+                        m_vars_extra[i].read_rng_dst =
+                            init_read_ranges_dv(dvp);
+                    }
+                    // size and displacement
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+                        // offset and length are derived from the array
+                        // descriptor
+                        __arr_data_offset_and_length(ap, into_disp, size);
+                        if (m_vars[i].direction.bits) {
+                            if (!is_arr_desc_contiguous(ap)) {
+                                if (m_vars[i].flags.is_noncont_dst) {
+                                    LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
+                                    return false;
+                                }
+                                m_vars[i].flags.is_noncont_dst = 1;
+                                m_vars_extra[i].read_rng_dst =
+                                    init_read_ranges_arr_desc(ap);
+                                if (!cean_ranges_match(
+                                    m_vars_extra[i].read_rng_src,
+                                    m_vars_extra[i].read_rng_dst)) {
+                                    LIBOFFLOAD_ERROR(c_ranges_dont_match);
+                                }
+                            }
+                        }
+                    }
+                    else {
+                        if (m_vars[i].flags.has_length) {
+                            size = __dv_data_length(dvp, m_vars[i].count);
+                        }
+                        else {
+                            size = __dv_data_length(dvp);
+                        }
+                        disp = 0;
+                    }
+
+                    int64_t size_src =
+                        m_vars_extra[i].read_rng_src ?
+                        cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+                        m_vars[i].size;
+                    int64_t size_dst =
+                        m_vars_extra[i].read_rng_dst ?
+                        cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+                        size;
+                    // It's supposed that "into" size must be not less
+                    // than src size
+                    if (size_src > size_dst) {
+                        LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+                            size_src, size_dst);
+                        exit(1);
+                    }
+
+                    // base address
+                    void *base = reinterpret_cast<void*>(dvp->Base);
+
+                    // allocate buffer
+                    if (m_vars[i].direction.in) {
+                        if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : into_disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf !=0) {
+                                // add buffer to the list of buffers
+                                // that are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+                                return false;
+                            }
+
+                            // need to update base in dope vector on device
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+
+                        if (ptr_data->alloc_disp != 0) {
+                            m_vars[i].flags.alloc_disp = 1;
+                            m_in_datalen += sizeof(alloc_disp);
+                        }
+
+                        if (m_vars[i].flags.sink_addr) {
+                            // get buffers's address on the sink
+                            if (!init_mic_address(ptr_data)) {
+                                return false;
+                            }
+                            m_in_datalen += sizeof(ptr_data->mic_addr);
+                        }
+
+                        if (!ptr_data->is_static && m_vars[i].free_if) {
+                            // need to decrement buffer reference on target
+                            m_need_runfunction = true;
+                        }
+
+                        // offset to base from the beginning of the buffer
+                        // memory
+                        into_offset =
+                            (char*) base - (char*) ptr_data->cpu_addr.start();
+
+                        // copy other pointer properties to var descriptor
+                        m_vars[i].mic_offset = ptr_data->mic_offset;
+                        m_vars[i].flags.is_static_dstn = ptr_data->is_static;
+                    }
+                    else { // src_is_for_mic
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           into_disp,
+                                           size,
+                                           false)) {
+                            return false;
+                        }
+                        into_offset = !ptr_data ?
+                            0 :
+                            (char*) base - (char*) ptr_data->cpu_addr.start();
+                    }
+
+                    // save pointer data
+                    m_vars_extra[i].dst_data = ptr_data;
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+                LIBOFFLOAD_ABORT;
+        }
+        // if into is used at CPU save its offset and disp
+        if (m_vars[i].direction.out) {
+            m_vars_extra[i].cpu_offset = into_offset;
+            m_vars_extra[i].cpu_disp   = into_disp;
+        }
+        else {
+            if (m_vars[i].flags.is_stack_buf) {
+                into_offset = static_cast<char*>(m_vars[i].into) -
+                    m_device.m_persist_list.front().cpu_stack_addr;
+            }
+            m_vars[i].offset = into_offset;
+            m_vars[i].disp   = into_disp;
+        }
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::setup_misc_data(const char *name)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
+
+    // we can skip run functon call together with wait if offloaded
+    // region is empty and there is no user defined non-pointer IN/OUT data
+    if (m_need_runfunction) {
+        // variable descriptors are sent as input data
+        m_in_datalen += m_vars_total * sizeof(VarDesc);
+
+        // timer data is sent as a part of the output data
+        m_out_datalen += OFFLOAD_TIMER_DATALEN();
+
+        // max from input data and output data length
+        uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
+                                                           m_out_datalen;
+
+        // Misc data has the following layout
+        //     <Function Descriptor>
+        //     <Function Name>
+        //     <In/Out Data>            (optional)
+        //
+        // We can transfer copyin/copyout data in misc/return data which can
+        // be passed to run function call if its size does not exceed
+        // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
+        // buffer for it.
+
+        m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
+        m_func_desc_size = (m_func_desc_size + 7) & ~7;
+
+        int misc_data_offset = 0;
+        int misc_data_size = 0;
+        if (data_len > 0) {
+            if (m_func_desc_size +
+                m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
+                m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
+                // use misc/return data for copyin/copyout
+                misc_data_offset = m_func_desc_size;
+                misc_data_size = data_len;
+            }
+            else {
+                OffloadTimer timer_buf(get_timer_data(),
+                                       c_offload_host_alloc_data_buffer);
+
+                // send/receive data using buffer
+                COIRESULT res = COI::BufferCreate(data_len,
+                                                  COI_BUFFER_NORMAL,
+                                                  0, 0,
+                                                  1, &m_device.get_process(),
+                                                  &m_inout_buf);
+                if (res != COI_SUCCESS) {
+                    if (m_status != 0) {
+                        m_status->result = translate_coi_error(res);
+                        return false;
+                    }
+                    report_coi_error(c_buf_create, res);
+                }
+
+                m_compute_buffers.push_back(m_inout_buf);
+                m_destroy_buffers.push_back(m_inout_buf);
+            }
+        }
+
+        // initialize function descriptor
+        m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
+                                                   misc_data_size);
+        m_func_desc->console_enabled = console_enabled;
+        m_func_desc->timer_enabled =
+            timer_enabled || (offload_report_level && offload_report_enabled);
+        m_func_desc->offload_report_level = offload_report_level;
+        m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
+        m_func_desc->in_datalen = m_in_datalen;
+        m_func_desc->out_datalen = m_out_datalen;
+        m_func_desc->vars_num = m_vars_total;
+        m_func_desc->data_offset = misc_data_offset;
+
+        // append entry name
+        strcpy(m_func_desc->data, name);
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::wait_dependencies(
+    const void **waits,
+    int num_waits
+)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
+    bool ret = true;
+
+    for (int i = 0; i < num_waits; i++) {
+
+        OffloadDescriptor *task = m_device.find_signal(waits[i], true);
+        if (task == 0) {
+            LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
+                             waits[i]);
+            LIBOFFLOAD_ABORT;
+        }
+
+        if (!task->offload_finish()) {
+            ret = false;
+        }
+
+        task->cleanup();
+        delete task;
+    }
+
+    return ret;
+}
+
+bool OffloadDescriptor::offload(
+    const char *name,
+    bool is_empty,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int vars_total,
+    const void **waits,
+    int num_waits,
+    const void **signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    if (signal == 0) {
+        OFFLOAD_DEBUG_TRACE_1(1,
+                      GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_init_func,
+                      "Offload function %s, is_empty=%d, #varDescs=%d, "
+                      "#waits=%d, signal=none\n",
+                      name, is_empty, vars_total, num_waits);
+        OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_sent_pointer_data,
+                      "#Wait : %d \n", num_waits);
+        OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_signal,
+                      "none %d\n", 0);
+    }
+    else {
+        OFFLOAD_DEBUG_TRACE_1(1,
+                      GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_init_func,
+                      "Offload function %s, is_empty=%d, #varDescs=%d, "
+                      "#waits=%d, signal=%p\n",
+                      name, is_empty, vars_total, num_waits,
+                      *signal);
+
+        OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_signal,
+                      "%d\n", signal);
+    }
+    OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_wait,
+                      "#Wait : %d  %p\n", num_waits, waits);
+
+    if (m_status != 0) {
+        m_status->result = OFFLOAD_SUCCESS;
+        m_status->device_number = m_device.get_logical_index();
+    }
+
+    m_need_runfunction = !is_empty;
+
+    // wait for dependencies to finish
+    if (!wait_dependencies(waits, num_waits)) {
+        cleanup();
+        return false;
+    }
+
+    // setup buffers
+    if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
+        cleanup();
+        return false;
+    }
+
+    // initiate send for pointers. Want to do it as early as possible.
+    if (!send_pointer_data(signal != 0)) {
+        cleanup();
+        return false;
+    }
+
+    // setup misc data for run function
+    if (!setup_misc_data(name)) {
+        cleanup();
+        return false;
+    }
+
+    // gather copyin data into buffer
+    if (!gather_copyin_data()) {
+        cleanup();
+        return false;
+    }
+
+    // Start the computation
+    if (!compute()) {
+        cleanup();
+        return false;
+    }
+
+    // initiate receive for pointers
+    if (!receive_pointer_data(signal != 0)) {
+        cleanup();
+        return false;
+    }
+
+    // if there is a signal save descriptor for the later use.
+    if (signal != 0) {
+        m_device.add_signal(*signal, this);
+        return true;
+    }
+
+    // wait for the offload to finish.
+    if (!offload_finish()) {
+        cleanup();
+        return false;
+    }
+
+    cleanup();
+    return true;
+}
+
+bool OffloadDescriptor::offload_finish()
+{
+    COIRESULT res;
+
+    // wait for compute dependencies to become signaled
+    if (m_in_deps_total > 0) {
+        OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
+
+        if (__offload_active_wait) {
+            // keep CPU busy
+            do {
+                res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+            }
+            while (res == COI_TIME_OUT_REACHED);
+        }
+        else {
+            res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
+        }
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_event_wait, res);
+        }
+    }
+
+    // scatter copyout data received from target
+    if (!scatter_copyout_data()) {
+        return false;
+    }
+    // wait for receive dependencies to become signaled
+    if (m_out_deps_total > 0) {
+        OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
+
+        if (__offload_active_wait) {
+            // keep CPU busy
+            do {
+                res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
+            }
+            while (res == COI_TIME_OUT_REACHED);
+        }
+        else {
+            res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
+        }
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_event_wait, res);
+        }
+    }
+
+    // destroy buffers
+    {
+        OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
+
+        for (BufferList::const_iterator it = m_destroy_buffers.begin();
+             it != m_destroy_buffers.end(); it++) {
+            res = COI::BufferDestroy(*it);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_destroy, res);
+            }
+        }
+    }
+
+    return true;
+}
+
+void OffloadDescriptor::cleanup()
+{
+    // release device in orsl
+    ORSL::release(m_device.get_logical_index());
+
+    OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
+
+    // report stuff
+    Offload_Report_Epilog(get_timer_data());
+}
+
+bool OffloadDescriptor::is_signaled()
+{
+    bool signaled = true;
+    COIRESULT res;
+
+    // check compute and receive dependencies
+    if (m_in_deps_total > 0) {
+        res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+        signaled = signaled && (res == COI_SUCCESS);
+    }
+    if (m_out_deps_total > 0) {
+        res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
+        signaled = signaled && (res == COI_SUCCESS);
+    }
+
+    return signaled;
+}
+
+// Send pointer data if source or destination or both of them are
+// noncontiguous. There is guarantee that length of destination enough for
+// transfered data.
+bool OffloadDescriptor::send_noncontiguous_pointer_data(
+    int i,
+    PtrData* src_data,
+    PtrData* dst_data,
+    COIEVENT *event
+    )
+{
+    int64_t offset_src, offset_dst;
+    int64_t length_src, length_dst;
+    int64_t length_src_cur, length_dst_cur;
+    int64_t send_size, data_sent = 0;
+    COIRESULT res;
+    bool dst_is_empty = true;
+    bool src_is_empty = true;
+
+    // Set length_src and length_dst
+    length_src = (m_vars_extra[i].read_rng_src) ?
+        m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+    length_dst = !m_vars[i].into ? length_src :
+                     (m_vars_extra[i].read_rng_dst) ?
+                     m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+    send_size = (length_src < length_dst) ? length_src : length_dst;
+
+    // consequently get contiguous ranges,
+    // define corresponded destination offset and send data
+    do {
+        if (src_is_empty) {
+            if (m_vars_extra[i].read_rng_src) {
+                if (!get_next_range(m_vars_extra[i].read_rng_src,
+                         &offset_src)) {
+                    // source ranges are over - nothing to send
+                    break;
+                }
+            }
+            else if (data_sent == 0) {
+                offset_src = m_vars_extra[i].cpu_disp;
+            }
+            else {
+                break;
+            }
+            length_src_cur = length_src;
+        }
+        else {
+            // if source is contiguous or its contiguous range is greater
+            // than destination one
+            offset_src += send_size;
+        }
+        length_src_cur -= send_size;
+        src_is_empty = length_src_cur == 0;
+
+        if (dst_is_empty) {
+            if (m_vars[i].into) {
+                if (m_vars_extra[i].read_rng_dst) {
+                    if (!get_next_range(m_vars_extra[i].read_rng_dst,
+                             &offset_dst)) {
+                        // destination ranges are over
+                        LIBOFFLOAD_ERROR(c_destination_is_over);
+                        return false;
+                    }
+                }
+                // into is contiguous.
+                else {
+                    offset_dst = m_vars[i].disp;
+                }
+                length_dst_cur = length_dst;
+            }
+            // same as source
+            else {
+                offset_dst = offset_src;
+                length_dst_cur = length_src;
+            }
+        }
+        else {
+            // if destination is contiguous or its contiguous range is greater
+            // than source one
+            offset_dst += send_size;
+        }
+        length_dst_cur -= send_size;
+        dst_is_empty = length_dst_cur == 0;
+
+        if (src_data != 0 && src_data->cpu_buf != 0) {
+            res = COI::BufferCopy(
+                dst_data->mic_buf,
+                src_data->cpu_buf,
+                m_vars[i].mic_offset - dst_data->alloc_disp +
+                m_vars[i].offset + offset_dst,
+                m_vars_extra[i].cpu_offset + offset_src,
+                send_size,
+                COI_COPY_UNSPECIFIED,
+                0, 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_copy, res);
+            }
+        }
+        else {
+            char *base = offload_get_src_base(m_vars[i].ptr,
+                m_vars[i].type.src);
+
+            res = COI::BufferWrite(
+                dst_data->mic_buf,
+                m_vars[i].mic_offset - dst_data->alloc_disp +
+                m_vars[i].offset + offset_dst,
+                base + offset_src,
+                send_size,
+                COI_COPY_UNSPECIFIED,
+                0, 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_write, res);
+            }
+        }
+        data_sent += length_src;
+    }
+    while (true);
+    return true;
+}
+
+bool OffloadDescriptor::send_pointer_data(bool is_async)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
+
+    uint64_t ptr_sent = 0;
+    COIRESULT res;
+
+    // Initiate send for pointer data
+    for (int i = 0; i < m_vars_total; i++) {
+        switch (m_vars[i].type.dst) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                if (m_vars[i].direction.in &&
+                    m_vars[i].flags.is_static_dstn) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+                    PtrData* dst_data = m_vars[i].into ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                    PtrData* src_data =
+                        VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+                        m_vars[i].flags.is_static ?
+                           m_vars_extra[i].src_data : 0;
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        if (!send_noncontiguous_pointer_data(
+                                i, src_data, dst_data, event)) {
+                            return false;
+                        }
+                    }
+                    else if (src_data != 0 && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            dst_data->mic_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+                if (m_vars[i].direction.in && m_vars[i].size > 0) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+                    PtrData* dst_data = m_vars[i].into ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                    PtrData* src_data =
+                        VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+                        m_vars[i].flags.is_static ?
+                            m_vars_extra[i].src_data : 0;
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        send_noncontiguous_pointer_data(
+                            i, src_data, dst_data, event);
+                    }
+                    else if (src_data != 0 && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            dst_data->mic_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+                if (m_vars[i].direction.in &&
+                    m_vars[i].size > 0) {
+                    PtrData *ptr_data = m_vars[i].into ?
+                                        m_vars_extra[i].dst_data :
+                                        m_vars_extra[i].src_data;
+                    PtrData* src_data = m_vars_extra[i].src_data;
+
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        send_noncontiguous_pointer_data(
+                            i, src_data, ptr_data, event);
+                    }
+                    else if (src_data && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            ptr_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].offset + ptr_data->mic_offset -
+                            ptr_data->alloc_disp +
+                            m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            ptr_data->mic_buf,
+                            ptr_data->mic_offset - ptr_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                if (m_vars[i].direction.in &&
+                    m_vars[i].size > 0) {
+                    PtrData *dst_data = m_vars[i].into ?
+                                        m_vars_extra[i].dst_data :
+                                        m_vars_extra[i].src_data;
+                    PtrData* src_data =
+                        (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+                        m_vars[i].flags.is_static) ?
+                            m_vars_extra[i].src_data : 0;
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        send_noncontiguous_pointer_data(
+                            i, src_data, dst_data, event);
+                    }
+                    else if (src_data && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].offset - dst_data->alloc_disp +
+                            dst_data->mic_offset +
+                            m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            dst_data->mic_buf,
+                            dst_data->mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            default:
+                break;
+        }
+
+        // alloc field isn't used at target.
+        // We can reuse it for offset of array pointers.
+        if (m_vars_extra[i].is_arr_ptr_el) {
+            m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
+        }
+    }
+
+    if (m_status) {
+        m_status->data_sent += ptr_sent;
+    }
+
+    OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
+    OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
+                  c_offload_sent_pointer_data,
+                  "Total pointer data sent to target: [%lld] bytes\n",
+                  ptr_sent);
+
+    return true;
+}
+
+bool OffloadDescriptor::gather_copyin_data()
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
+
+    if (m_need_runfunction && m_in_datalen > 0) {
+        COIMAPINSTANCE map_inst;
+        char *data;
+
+        // init marshaller
+        if (m_inout_buf != 0) {
+            OffloadTimer timer_map(get_timer_data(),
+                                   c_offload_host_map_in_data_buffer);
+
+            COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
+                                           COI_MAP_WRITE_ENTIRE_BUFFER,
+                                           0, 0, 0, &map_inst,
+                                           reinterpret_cast<void**>(&data));
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_map, res);
+            }
+        }
+        else {
+            data = (char*) m_func_desc + m_func_desc->data_offset;
+        }
+
+        // send variable descriptors
+        memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
+        data += m_vars_total * sizeof(VarDesc);
+
+        // init marshaller
+        m_in.init_buffer(data, m_in_datalen);
+
+        // Gather copy data into buffer
+        for (int i = 0; i < m_vars_total; i++) {
+            bool src_is_for_mic = (m_vars[i].direction.out ||
+                                   m_vars[i].into == NULL);
+            PtrData* ptr_data = src_is_for_mic ?
+                                m_vars_extra[i].src_data :
+                                m_vars_extra[i].dst_data;
+            if (m_vars[i].flags.alloc_disp) {
+                m_in.send_data(&ptr_data->alloc_disp,
+                               sizeof(ptr_data->alloc_disp));
+            }
+
+            // send sink address to the target
+            if (m_vars[i].flags.sink_addr) {
+                m_in.send_data(&ptr_data->mic_addr,
+                               sizeof(ptr_data->mic_addr));
+            }
+
+            switch (m_vars[i].type.dst) {
+                case c_data_ptr_array:
+                    break;
+                case c_data:
+                case c_void_ptr:
+                case c_cean_var:
+                    if (m_vars[i].direction.in &&
+                        !m_vars[i].flags.is_static_dstn) {
+
+                        char *ptr = offload_get_src_base(m_vars[i].ptr,
+                                                         m_vars[i].type.src);
+                        if (m_vars[i].type.dst == c_cean_var) {
+                            // offset and length are derived from the array
+                            // descriptor
+                            int64_t size = m_vars[i].size;
+                            int64_t disp = m_vars[i].disp;
+                            m_in.send_data(reinterpret_cast<char*>(&size),
+                                           sizeof(int64_t));
+                            m_in.send_data(reinterpret_cast<char*>(&disp),
+                                           sizeof(int64_t));
+                        }
+
+                        m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
+                                       m_vars[i].size);
+                    }
+                    break;
+
+                case c_dv:
+                    if (m_vars[i].direction.bits ||
+                        m_vars[i].alloc_if ||
+                        m_vars[i].free_if) {
+                        // send dope vector excluding base
+                        char *ptr = static_cast<char*>(m_vars[i].ptr);
+                        m_in.send_data(ptr + sizeof(uint64_t),
+                                       m_vars[i].size - sizeof(uint64_t));
+                    }
+                    break;
+
+                case c_data_ptr:
+                    // send to target addresses of obsolete
+                    // stacks to be released
+                    if (m_vars[i].flags.is_stack_buf &&
+                        !m_vars[i].direction.bits &&
+                        m_vars[i].alloc_if &&
+                        m_vars[i].size != 0) {
+                        for (PtrDataList::iterator it =
+                            m_destroy_stack.begin();
+                            it != m_destroy_stack.end(); it++) {
+                            PtrData * ptr_data = *it;
+                            m_in.send_data(&(ptr_data->mic_addr),
+                                sizeof(ptr_data->mic_addr));
+                        }
+                    }
+                    break;
+                case c_func_ptr:
+                    if (m_vars[i].direction.in) {
+                        m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+
+        if (m_status) {
+            m_status->data_sent += m_in.get_tfr_size();
+        }
+
+        if (m_func_desc->data_offset == 0) {
+            OffloadTimer timer_unmap(get_timer_data(),
+                                     c_offload_host_unmap_in_data_buffer);
+            COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_unmap, res);
+            }
+        }
+    }
+
+    OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
+    OFFLOAD_DEBUG_TRACE_1(1,
+                  GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
+                  "Total copyin data sent to target: [%lld] bytes\n",
+                  m_in.get_tfr_size());
+
+    return true;
+}
+
+bool OffloadDescriptor::compute()
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
+
+    if (m_need_runfunction) {
+        OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
+                              c_offload_compute, "Compute task on MIC\n");
+
+        void* misc = m_func_desc;
+        int   misc_len = m_func_desc_size;
+        void* ret = 0;
+        int   ret_len = 0;
+
+        if (m_func_desc->data_offset != 0) {
+            misc_len += m_in_datalen;
+
+            if (m_out_datalen > 0) {
+                ret = (char*) m_func_desc + m_func_desc->data_offset;
+                ret_len = m_out_datalen;
+            }
+        }
+
+        // dispatch task
+        COIRESULT res;
+        COIEVENT event;
+        res = m_device.compute(m_compute_buffers,
+                               misc, misc_len,
+                               ret, ret_len,
+                               m_in_deps_total,
+                               m_in_deps_total > 0 ? m_in_deps : 0,
+                               &event);
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_pipeline_run_func, res);
+        }
+
+        m_in_deps_total = 1;
+        m_in_deps[0] = event;
+    }
+
+    return true;
+}
+
+// recieve pointer data if source or destination or both of them are
+// noncontiguous. There is guarantee that length of destination enough for
+// transfered data.
+bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
+    int i,
+    char* base,
+    COIBUFFER dst_buf,
+    COIEVENT *event
+)
+{
+    int64_t offset_src, offset_dst;
+    int64_t length_src, length_dst;
+    int64_t length_src_cur, length_dst_cur;
+    int64_t recieve_size, data_recieved = 0;
+    COIRESULT res;
+    bool dst_is_empty = true;
+    bool src_is_empty = true;
+
+    // Set length_src and length_dst
+    length_src = (m_vars_extra[i].read_rng_src) ?
+        m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+    length_dst = !m_vars[i].into ? length_src :
+                     (m_vars_extra[i].read_rng_dst) ?
+                     m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+    recieve_size = (length_src < length_dst) ? length_src : length_dst;
+
+    // consequently get contiguous ranges,
+    // define corresponded destination offset and recieve data
+    do {
+        // get sorce offset
+        if (src_is_empty) {
+            if (m_vars_extra[i].read_rng_src) {
+                if (!get_next_range(m_vars_extra[i].read_rng_src,
+                         &offset_src)) {
+                    // source ranges are over - nothing to send
+                    break;
+                }
+            }
+            else if (data_recieved == 0) {
+                offset_src = 0;
+            }
+            else {
+                break;
+            }
+            length_src_cur = length_src;
+        }
+        else {
+            // if source is contiguous or its contiguous range is greater
+            // than destination one
+            offset_src += recieve_size;
+        }
+        length_src_cur -= recieve_size;
+        src_is_empty = length_src_cur == 0;
+
+        // get destination offset
+        if (dst_is_empty) {
+            if (m_vars[i].into) {
+                if (m_vars_extra[i].read_rng_dst) {
+                    if (!get_next_range(m_vars_extra[i].read_rng_dst,
+                             &offset_dst)) {
+                        // destination ranges are over
+                        LIBOFFLOAD_ERROR(c_destination_is_over);
+                        return false;
+                    }
+                }
+                // destination is contiguous.
+                else {
+                    offset_dst = m_vars_extra[i].cpu_disp;
+                }
+                length_dst_cur = length_dst;
+            }
+            // same as source
+            else {
+                offset_dst = offset_src;
+                length_dst_cur = length_src;
+            }
+        }
+        else {
+            // if destination is contiguous or its contiguous range is greater
+            // than source one
+            offset_dst += recieve_size;
+        }
+        length_dst_cur -= recieve_size;
+        dst_is_empty = length_dst_cur == 0;
+
+        if (dst_buf != 0) {
+            res = COI::BufferCopy(
+                dst_buf,
+                m_vars_extra[i].src_data->mic_buf,
+                m_vars_extra[i].cpu_offset + offset_dst,
+                m_vars[i].offset + offset_src +
+                m_vars[i].mic_offset -
+                m_vars_extra[i].src_data->alloc_disp,
+                recieve_size,
+                COI_COPY_UNSPECIFIED,
+                m_in_deps_total,
+                m_in_deps_total > 0 ? m_in_deps : 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_copy, res);
+            }
+        }
+        else {
+            res = COI::BufferRead(
+                m_vars_extra[i].src_data->mic_buf,
+                m_vars[i].offset + offset_src +
+                m_vars[i].mic_offset -
+                m_vars_extra[i].src_data->alloc_disp,
+                base + offset_dst,
+                recieve_size,
+                COI_COPY_UNSPECIFIED,
+                m_in_deps_total,
+                m_in_deps_total > 0 ? m_in_deps : 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_read, res);
+            }
+        }
+        data_recieved += recieve_size;
+    }
+    while (true);
+    return true;
+}
+
+bool OffloadDescriptor::receive_pointer_data(bool is_async)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
+
+    uint64_t ptr_received = 0;
+    COIRESULT res;
+
+    for (int i = 0; i < m_vars_total; i++) {
+        switch (m_vars[i].type.src) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                if (m_vars[i].direction.out &&
+                    m_vars[i].flags.is_static) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_in_deps_total > 0 ||
+                         m_vars[i].size >= __offload_use_async_buffer_read) ?
+                        &m_out_deps[m_out_deps_total++] : 0;
+                    PtrData *ptr_data = NULL;
+                    COIBUFFER dst_buf = NULL; // buffer at host
+                    char *base;
+
+                    if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+                        ptr_data = m_vars[i].into ?
+                                   m_vars_extra[i].dst_data :
+                                   m_vars_extra[i].src_data;
+                    }
+                    else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+                        if (m_vars[i].flags.is_static_dstn) {
+                            ptr_data = m_vars[i].into ?
+                                       m_vars_extra[i].dst_data :
+                                       m_vars_extra[i].src_data;
+                        }
+                    }
+                    dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
+                    if (dst_buf == NULL) {
+                        base = offload_get_src_base(
+                            m_vars[i].into ?
+                            static_cast<char*>(m_vars[i].into) :
+                            static_cast<char*>(m_vars[i].ptr),
+                            m_vars[i].type.dst);
+                    }
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        recieve_noncontiguous_pointer_data(
+                            i, base, dst_buf, event);
+                    }
+                    else if (dst_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_buf,
+                            m_vars_extra[i].src_data->mic_buf,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].offset + m_vars[i].disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                       res = COI::BufferRead(
+                            m_vars_extra[i].src_data->mic_buf,
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_read, res);
+                        }
+                    }
+                    ptr_received += m_vars[i].size;
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+            case c_dv_ptr: {
+                COIBUFFER dst_buf = NULL; // buffer on host
+                if (m_vars[i].direction.out && m_vars[i].size > 0) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_in_deps_total > 0 ||
+                         m_vars[i].size >= __offload_use_async_buffer_read) ?
+                        &m_out_deps[m_out_deps_total++] : 0;
+
+                    uint64_t dst_offset = 0;
+                    char *base = static_cast<char*>(m_vars[i].ptr);
+
+                    if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+                        PtrData *ptr_data = m_vars[i].into ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                        dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
+                        if (dst_buf == NULL) {
+                            base = m_vars[i].into ?
+                                   *static_cast<char**>(m_vars[i].into) :
+                                   *static_cast<char**>(m_vars[i].ptr);
+                        }
+                        dst_offset = m_vars_extra[i].cpu_offset +
+                                     m_vars_extra[i].cpu_disp;
+                    }
+                    else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+                        if (m_vars[i].flags.is_static_dstn) {
+                            dst_buf = m_vars[i].into ?
+                                        m_vars_extra[i].dst_data->cpu_buf :
+                                        m_vars_extra[i].src_data->cpu_buf;
+                        }
+                        if (dst_buf == NULL) {
+                            base = offload_get_src_base(
+                                m_vars[i].into ?
+                                static_cast<char*>(m_vars[i].into) :
+                                static_cast<char*>(m_vars[i].ptr),
+                                m_vars[i].type.dst);
+                        }
+                        dst_offset = m_vars_extra[i].cpu_offset +
+                                     m_vars_extra[i].cpu_disp;
+                    }
+                    else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
+                             VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+                        PtrData *ptr_data = m_vars[i].into != 0 ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                        dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
+                        if (dst_buf == NULL) {
+                            base = offload_get_src_base(
+                                m_vars[i].into ?
+                                static_cast<char*>(m_vars[i].into) :
+                                static_cast<char*>(m_vars[i].ptr),
+                                m_vars[i].type.dst);
+
+                        }
+                        dst_offset = m_vars_extra[i].cpu_offset +
+                                     m_vars_extra[i].cpu_disp;
+                    }
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        recieve_noncontiguous_pointer_data(
+                            i, base, dst_buf, event);
+                    }
+                    else if (dst_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_buf,
+                            m_vars_extra[i].src_data->mic_buf,
+                            dst_offset,
+                            m_vars[i].offset + m_vars[i].disp +
+                                m_vars[i].mic_offset -
+                                m_vars_extra[i].src_data->alloc_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        res = COI::BufferRead(
+                            m_vars_extra[i].src_data->mic_buf,
+                            m_vars[i].offset + m_vars[i].disp +
+                                m_vars[i].mic_offset -
+                                m_vars_extra[i].src_data->alloc_disp,
+                            base + dst_offset,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_read, res);
+                        }
+                    }
+                    ptr_received += m_vars[i].size;
+                }
+                break;
+            }
+
+            default:
+                break;
+        }
+
+        // destroy buffers for obsolete stacks
+        if (m_destroy_stack.size() != 0) {
+            for (PtrDataList::iterator it = m_destroy_stack.begin();
+                it != m_destroy_stack.end(); it++) {
+                PtrData *ptr_data = *it;
+                m_destroy_buffers.push_back(ptr_data->mic_buf);
+                OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
+                                  ptr_data->mic_addr);
+            }
+            m_destroy_stack.clear();
+        }
+        if (m_vars[i].free_if) {
+            // remove association for automatic variables
+            if (m_is_openmp && !m_vars[i].flags.is_static &&
+                (m_vars[i].type.src == c_data ||
+                 m_vars[i].type.src == c_void_ptr ||
+                 m_vars[i].type.src == c_cean_var)) {
+                AutoData *auto_data = m_vars_extra[i].auto_data;
+                if (auto_data != 0 && auto_data->remove_reference() == 0) {
+                    m_device.remove_auto_data(auto_data->cpu_addr.start());
+                }
+            }
+
+            // destroy buffers
+            if (m_vars[i].direction.out || m_vars[i].into == NULL) {
+                if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
+                    !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
+                    !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
+                    continue;
+                }
+
+                PtrData *ptr_data = m_vars_extra[i].src_data;
+                if (ptr_data->remove_reference() == 0) {
+                    // destroy buffers
+                    if (ptr_data->cpu_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->cpu_buf);
+                    }
+                    if (ptr_data->mic_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->mic_buf);
+                    }
+                    OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+                                  ptr_data->cpu_addr.start());
+
+                    // remove association from map
+                    m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+                }
+            }
+            else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
+                     VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
+                     VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
+                PtrData *ptr_data = m_vars_extra[i].dst_data;
+                if (ptr_data->remove_reference() == 0) {
+                    // destroy buffers
+                    if (ptr_data->cpu_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->cpu_buf);
+                    }
+                    if (ptr_data->mic_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->mic_buf);
+                    }
+                    OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+                                  ptr_data->cpu_addr.start());
+
+                    // remove association from map
+                    m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+                }
+            }
+        }
+    }
+
+    if (m_status) {
+        m_status->data_received += ptr_received;
+    }
+
+    OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
+    OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
+                  c_offload_received_pointer_data,
+                  "Total pointer data received from target: [%lld] bytes\n",
+                  ptr_received);
+
+    return true;
+}
+
+bool OffloadDescriptor::scatter_copyout_data()
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
+
+    if (m_need_runfunction && m_out_datalen > 0) {
+
+        // total size that need to be transferred from target to host
+        COIMAPINSTANCE map_inst;
+        COIRESULT res;
+        char *data;
+
+        // output data buffer
+        if (m_func_desc->data_offset == 0) {
+            OffloadTimer timer_map(get_timer_data(),
+                                   c_offload_host_map_out_data_buffer);
+
+            COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
+                                           COI_MAP_READ_ONLY, 0, 0, 0,
+                                           &map_inst,
+                                            reinterpret_cast<void**>(&data));
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_map, res);
+            }
+        }
+        else {
+            data = (char*) m_func_desc + m_func_desc->data_offset;
+        }
+
+        // get timing data
+        OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
+        data += OFFLOAD_TIMER_DATALEN();
+
+        // initialize output marshaller
+        m_out.init_buffer(data, m_out_datalen);
+
+        for (int i = 0; i < m_vars_total; i++) {
+            switch (m_vars[i].type.src) {
+                case c_data_ptr_array:
+                    break;
+                case c_data:
+                case c_void_ptr:
+                case c_cean_var:
+                    if (m_vars[i].direction.out &&
+                        !m_vars[i].flags.is_static) {
+
+                        if (m_vars[i].into) {
+                            char *ptr = offload_get_src_base(
+                                static_cast<char*>(m_vars[i].into),
+                                m_vars[i].type.dst);
+                            m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
+                                               m_vars[i].size);
+                        }
+                        else {
+                            m_out.receive_data(
+                                static_cast<char*>(m_vars[i].ptr) +
+                                    m_vars_extra[i].cpu_disp,
+                                m_vars[i].size);
+                        }
+                    }
+                    break;
+
+                case c_func_ptr:
+                    if (m_vars[i].direction.out) {
+                        m_out.receive_func_ptr((const void**) m_vars[i].ptr);
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+
+        if (m_status) {
+            m_status->data_received += m_out.get_tfr_size();
+        }
+
+        if (m_func_desc->data_offset == 0) {
+            OffloadTimer timer_unmap(get_timer_data(),
+                                     c_offload_host_unmap_out_data_buffer);
+
+            COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_unmap, res);
+            }
+        }
+    }
+
+    OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
+    OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
+                  m_out.get_tfr_size());
+
+    return true;
+}
+
+void get_arr_desc_numbers(
+    const arr_desc *ap,
+    int64_t el_size,
+    int64_t &offset,
+    int64_t &size,
+    int     &el_number,
+    CeanReadRanges* &ptr_ranges
+)
+{
+    if (is_arr_desc_contiguous(ap)) {
+        ptr_ranges = NULL;
+        __arr_data_offset_and_length(ap, offset, size);
+        el_number = size / el_size;
+    }
+    else {
+        ptr_ranges = init_read_ranges_arr_desc(ap);
+        el_number = (ptr_ranges->range_size / el_size) *
+                    ptr_ranges->range_max_number;
+        size = ptr_ranges->range_size;
+    }
+}
+
+arr_desc * make_arr_desc(
+    void*   ptr_val,
+    int64_t extent_start_val,
+    int64_t extent_elements_val,
+    int64_t size
+)
+{
+    arr_desc *res;
+    res = (arr_desc *)malloc(sizeof(arr_desc));
+    res->base = reinterpret_cast<int64_t>(ptr_val);
+    res->rank = 1;
+    res->dim[0].size = size;
+    res->dim[0].lindex = 0;
+    res->dim[0].lower = extent_start_val;
+    res->dim[0].upper = extent_elements_val + extent_start_val - 1;
+    res->dim[0].stride = 1;
+    return res;
+}
+
+bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
+{
+    int             pointers_number;
+    int             tmp_val;
+    int             new_index = m_vars_total;
+    const arr_desc *ap;
+    const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
+    int             flags = vd3->array_fields;
+    bool            src_is_for_mic = (m_vars[i].direction.out ||
+                                      m_vars[i].into == NULL);
+
+    ReadArrElements<void *>  ptr;
+    ReadArrElements<void *>  into;
+    ReadArrElements<int64_t> ext_start;
+    ReadArrElements<int64_t> ext_elements;
+    ReadArrElements<int64_t> align;
+    ReadArrElements<int64_t> alloc_if;
+    ReadArrElements<int64_t> free_if;
+    ReadArrElements<int64_t> into_start;
+    ReadArrElements<int64_t> into_elem;
+    ReadArrElements<int64_t> alloc_start;
+    ReadArrElements<int64_t> alloc_elem;
+
+
+    ap = static_cast<const arr_desc*>(vd3->ptr_array);
+
+    // "pointers_number" for total number of transfered pointers.
+    // For each of them we create new var_desc and put it at the bottom
+    // of the var_desc's array
+    get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
+        pointers_number, ptr.ranges);
+    ptr.base = reinterpret_cast<char*>(ap->base);
+
+    // 2. prepare memory for new var_descs
+    m_vars_total += pointers_number;
+    m_vars       = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
+    m_vars_extra =
+        (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
+    m_in_deps    =
+        (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
+    m_out_deps   =
+        (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
+
+    // 3. Prepare for reading new var_desc's fields
+    //    EXTENT START
+    if ((flags & (1<<flag_extent_start_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->extent_start);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
+            ext_start.size, tmp_val, ext_start.ranges);
+        ext_start.base = reinterpret_cast<char*>(ap->base);
+        ext_start.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
+        ext_start.val = (int64_t)vd3->extent_start;
+    }
+    else {
+        ext_start.val = 0;
+    }
+
+    //    EXTENT ELEMENTS NUMBER
+    if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->extent_elements);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
+            ext_elements.offset, ext_elements.size,
+            tmp_val, ext_elements.ranges);
+        ext_elements.base = reinterpret_cast<char*>(ap->base);
+        ext_elements.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
+        ext_elements.val = (int64_t)vd3->extent_elements;
+    }
+    else {
+        ext_elements.val = m_vars[i].count;
+    }
+
+    //    ALLOC_IF
+    if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
+            alloc_if.size, tmp_val, alloc_if.ranges);
+        alloc_if.base = reinterpret_cast<char*>(ap->base);
+        alloc_if.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
+            return false;
+        }
+    }
+    else {
+        alloc_if.val = m_vars[i].count;
+    }
+
+    //    FREE_IF
+    if ((flags & (1<<flag_free_if_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->free_if_array);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
+            free_if.size, tmp_val, free_if.ranges);
+        free_if.base = reinterpret_cast<char*>(ap->base);
+        free_if.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
+            return false;
+        }
+    }
+    else {
+        free_if.val = m_vars[i].count;
+    }
+
+    //    ALIGN
+
+    if ((flags & (1<<flag_align_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->align_array);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
+            align.size, tmp_val, align.ranges);
+        align.base = reinterpret_cast<char*>(ap->base);
+        align.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
+            return false;
+        }
+    }
+    else {
+        align.val = m_vars[i].align;
+    }
+
+    // 3.1 INTO
+
+    if (m_vars[i].into) {
+        ap = static_cast<const arr_desc*>(m_vars[i].into);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
+            into.size, tmp_val, into.ranges);
+        into.base = reinterpret_cast<char*>(ap->base);
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
+            return false;
+        }
+    }
+
+    // 3.2 INTO_START
+
+    if ((flags & (1<<flag_into_start_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->into_start);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
+            into_start.size, tmp_val, into_start.ranges);
+        into_start.base = reinterpret_cast<char*>(ap->base);
+        into_start.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
+        into_start.val = (int64_t)vd3->into_start;
+    }
+    else {
+        into_start.val = 0;
+    }
+
+    // 3.3 INTO_ELEMENTS
+
+    if ((flags & (1<<flag_into_elements_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->into_elements);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
+            into_elem.size, tmp_val, into_elem.ranges);
+        into_elem.base = reinterpret_cast<char*>(ap->base);
+        into_elem.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
+        into_elem.val = (int64_t)vd3->into_elements;
+    }
+    else {
+        into_elem.val = m_vars[i].count;
+    }
+
+    //    alloc_start
+
+    if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->alloc_start);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
+            alloc_start.offset, alloc_start.size, tmp_val,
+            alloc_start.ranges);
+        alloc_start.base = reinterpret_cast<char*>(ap->base);
+        alloc_start.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
+        alloc_start.val = (int64_t)vd3->alloc_start;
+    }
+    else {
+        alloc_start.val = 0;
+    }
+
+    //    alloc_elem
+
+    if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
+            alloc_elem.size, tmp_val, alloc_elem.ranges);
+        alloc_elem.base = reinterpret_cast<char*>(ap->base);
+        alloc_elem.el_size = ap->dim[ap->rank - 1].size;
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
+                             "alloc_extent elements");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
+        alloc_elem.val = (int64_t)vd3->alloc_elements;
+    }
+    else {
+        alloc_elem.val = 0;
+    }
+
+    for (int k = 0; k < pointers_number; k++) {
+        int type = flags & 0x3f;
+        int type_src, type_dst;
+        //  Get new values
+        // type_src, type_dst
+        type_src = type_dst = (type == c_data_ptr_array) ?
+                              c_data_ptr   : (type == c_func_ptr_array) ?
+                              c_func_ptr   : (type == c_void_ptr_array) ?
+                              c_void_ptr   : (type == c_string_ptr_array) ?
+                              c_string_ptr : 0;
+
+        // Get ptr val
+        if (!ptr.read_next(true)) {
+            break;
+        }
+        else {
+            ptr.val = (void*)(ptr.base + ptr.offset);
+        }
+
+        // !!! If we got error at phase of reading - it's an internal
+        // !!! error, as we must detect mismatch before
+
+        // Get into val
+        if (m_vars[i].into) {
+            if (!into.read_next(true)) {
+                LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
+                LIBOFFLOAD_ABORT;
+            }
+            else {
+                into.val = (void*)(into.base + into.offset);
+            }
+        }
+
+        // Get other components of the clause
+        if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!ext_elements.read_next(
+                flags & (1<<flag_extent_elements_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!align.read_next(flags & (1<<flag_align_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!alloc_elem.read_next(
+                 flags & (1<<flag_alloc_elements_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
+            LIBOFFLOAD_ABORT;
+        }
+
+        m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
+        m_vars[new_index + k].alloc_if = alloc_if.val;
+        m_vars[new_index + k].free_if = free_if.val;
+        m_vars[new_index + k].align = align.val;
+        m_vars[new_index + k].mic_offset = 0;
+        m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
+        m_vars[new_index + k].offset = 0;
+        m_vars[new_index + k].size = m_vars[i].size;
+
+        if (ext_start.val == 0) {
+            m_vars[new_index + k].count = ext_elements.val;
+            m_vars[new_index + k].ptr = ptr.val;
+            if (type_src == c_string_ptr) {
+                m_vars[new_index + k].size = 0;
+            }
+        }
+        else {
+            m_vars[new_index + k].count = 0;
+            m_vars[new_index + k].ptr =
+                static_cast<void*>(make_arr_desc(
+                ptr.val,
+                ext_start.val,
+                ext_elements.val,
+                m_vars[i].size));
+
+            type_src = type_src == c_data_ptr ? c_cean_var_ptr :
+                                   c_string_ptr ? c_cean_var_ptr :
+                                   type_src;
+            if (!m_vars[i].into) {
+                type_dst = type_src;
+            }
+        }
+
+        if (m_vars[i].into && into_elem.val != 0) {
+            m_vars[new_index + k].into =
+                static_cast<void*>(make_arr_desc(
+                into.val,
+                into_start.val,
+                into_elem.val,
+                m_vars[i].size));
+            type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
+                       (type == c_string_ptr_array) ? c_cean_var_ptr :
+                        type_src;
+        }
+        else {
+            m_vars[new_index + k].into = NULL;
+        }
+
+        if (alloc_elem.val != 0) {
+            m_vars[new_index + k].alloc =
+                static_cast<void*>(make_arr_desc(
+                ptr.val,
+                alloc_start.val,
+                alloc_elem.val,
+                m_vars[i].size));
+        }
+        else {
+            m_vars[new_index + k].alloc = NULL;
+        }
+
+        m_vars[new_index + k].type.src = type_src;
+        m_vars[new_index + k].type.dst = type_dst;
+
+        m_vars_extra[new_index + k].is_arr_ptr_el = 1;
+        m_vars_extra[new_index + k].ptr_arr_offset =
+            src_is_for_mic ? ptr.offset : into.offset;
+    }
+    // count and alloc fields are useless at target. They can be reused
+    // for pointer arrays.
+    m_vars[i].count = pointers_number;
+    m_vars[i].ptr_arr_offset = new_index;
+    return true;
+}
+
+static void __offload_fini_library(void)
+{
+    OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
+    if (mic_engines_total > 0) {
+        delete[] mic_engines;
+
+        if (mic_proxy_fs_root != 0) {
+            free(mic_proxy_fs_root);
+            mic_proxy_fs_root = 0;
+        }
+
+        if (mic_library_path != 0) {
+            free(mic_library_path);
+            mic_library_path = 0;
+        }
+
+        // destroy thread key
+        thread_key_delete(mic_thread_key);
+    }
+
+    // unload COI library
+    if (COI::is_available) {
+        COI::fini();
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
+}
+
+static void __offload_init_library_once(void)
+{
+    COIRESULT res;
+    uint32_t num_devices;
+    std::bitset<MIC_ENGINES_MAX> devices;
+
+    prefix = report_get_message_str(c_report_host);
+
+    // initialize trace
+    const char *env_var = getenv(htrace_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            console_enabled = new_val & 0x0f;
+        }
+    }
+
+    env_var = getenv(offload_report_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t env_val;
+        if (__offload_parse_int_string(env_var, env_val)) {
+            if (env_val == OFFLOAD_REPORT_1 ||
+                env_val == OFFLOAD_REPORT_2 ||
+                env_val == OFFLOAD_REPORT_3) {
+                offload_report_level = env_val;
+            }
+            else {
+                LIBOFFLOAD_ERROR(c_invalid_env_report_value,
+                                 offload_report_envname);
+            }
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+                             offload_report_envname);
+        }
+    }
+    else if (!offload_report_level) {
+        env_var = getenv(timer_envname);
+        if (env_var != 0 && *env_var != '\0') {
+            timer_enabled = atoi(env_var);
+        }
+    }
+
+    // initialize COI
+    if (!COI::init()) {
+        return;
+    }
+
+    // get number of devices installed in the system
+    res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
+    if (res != COI_SUCCESS) {
+        return;
+    }
+
+    if (num_devices > MIC_ENGINES_MAX) {
+        num_devices = MIC_ENGINES_MAX;
+    }
+
+    // fill in the list of devices that can be used for offloading
+    env_var = getenv("OFFLOAD_DEVICES");
+    if (env_var != 0) {
+        if (strcasecmp(env_var, "none") != 0) {
+            // value is composed of comma separated physical device indexes
+            char *buf = strdup(env_var);
+            char *str, *ptr;
+            for (str = strtok_r(buf, ",", &ptr); str != 0;
+                 str = strtok_r(0, ",", &ptr)) {
+                // convert string to an int
+                int64_t num;
+                if (!__offload_parse_int_string(str, num)) {
+                    LIBOFFLOAD_ERROR(c_mic_init5);
+
+                    // fallback to using all installed devices
+                    devices.reset();
+                    for (int i = 0; i < num_devices; i++) {
+                        devices.set(i);
+                    }
+                    break;
+                }
+                if (num < 0 || num >= num_devices) {
+                    LIBOFFLOAD_ERROR(c_mic_init6, num);
+                    continue;
+                }
+                devices.set(num);
+            }
+            free(buf);
+        }
+    }
+    else {
+        // use all available devices
+        for (int i = 0; i < num_devices; i++) {
+            COIENGINE engine;
+            res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
+            if (res == COI_SUCCESS) {
+                devices.set(i);
+            }
+        }
+    }
+
+    mic_engines_total = devices.count();
+
+    // no need to continue if there are no devices to offload to
+    if (mic_engines_total <= 0) {
+        return;
+    }
+
+    // initialize indexes for available devices
+    mic_engines = new Engine[mic_engines_total];
+    for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
+        if (devices[p_idx]) {
+            mic_engines[l_idx].set_indexes(l_idx, p_idx);
+            l_idx++;
+        }
+    }
+
+    // library search path for device binaries
+    env_var = getenv("MIC_LD_LIBRARY_PATH");
+    if (env_var != 0) {
+        mic_library_path = strdup(env_var);
+    }
+
+    // memory size reserved for COI buffers
+    env_var = getenv("MIC_BUFFERSIZE");
+    if (env_var != 0) {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            mic_buffer_size = new_size;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
+        }
+    }
+
+    // determine stacksize for the pipeline on the device
+    env_var = getenv("MIC_STACKSIZE");
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size) &&
+            (new_size >= 16384) && ((new_size & 4095) == 0)) {
+            mic_stack_size = new_size;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_mic_init3);
+        }
+    }
+
+    // proxy I/O
+    env_var = getenv("MIC_PROXY_IO");
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            mic_proxy_io = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
+        }
+    }
+    env_var = getenv("MIC_PROXY_FS_ROOT");
+    if (env_var != 0 && *env_var != '\0') {
+        mic_proxy_fs_root = strdup(env_var);
+    }
+
+    // Prepare environment for the target process using the following
+    // rules
+    // - If MIC_ENV_PREFIX is set then any environment variable on the
+    //   host which has that prefix are copied to the device without
+    //   the prefix.
+    //   All other host environment variables are ignored.
+    // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
+    //   environment is duplicated.
+    env_var = getenv("MIC_ENV_PREFIX");
+    if (env_var != 0 && *env_var != '\0') {
+        mic_env_vars.set_prefix(env_var);
+
+        int len = strlen(env_var);
+        for (int i = 0; environ[i] != 0; i++) {
+            if (strncmp(environ[i], env_var, len) == 0 &&
+                strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
+                environ[i][len] != '=') {
+                mic_env_vars.analyze_env_var(environ[i]);
+            }
+        }
+    }
+
+    // create key for thread data
+    if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
+        LIBOFFLOAD_ERROR(c_mic_init4, errno);
+        return;
+    }
+
+    // cpu frequency
+    cpu_frequency = COI::PerfGetCycleFrequency();
+
+    env_var = getenv(mic_use_2mb_buffers_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            __offload_use_2mb_buffers = new_size;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+                             mic_use_2mb_buffers_envname);
+        }
+    }
+
+    env_var = getenv(mic_use_async_buffer_write_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            __offload_use_async_buffer_write = new_size;
+        }
+    }
+
+    env_var = getenv(mic_use_async_buffer_read_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            __offload_use_async_buffer_read = new_size;
+        }
+    }
+
+    // mic initialization type
+    env_var = getenv(offload_init_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        if (strcmp(env_var, "on_offload") == 0) {
+            __offload_init_type = c_init_on_offload;
+        }
+        else if (strcmp(env_var, "on_offload_all") == 0) {
+            __offload_init_type = c_init_on_offload_all;
+        }
+#ifndef TARGET_WINNT
+        else if (strcmp(env_var, "on_start") == 0) {
+            __offload_init_type = c_init_on_start;
+        }
+#endif // TARGET_WINNT
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
+        }
+    }
+
+    // active wait
+    env_var = getenv(offload_active_wait_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            __offload_active_wait = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+                             offload_active_wait_envname);
+        }
+    }
+
+    // omp device num
+    env_var = getenv(omp_device_num_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
+            __omp_device_num = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
+                             omp_device_num_envname);
+        }
+    }
+
+    // init ORSL
+    ORSL::init();
+}
+
+extern int __offload_init_library(void)
+{
+    // do one time intialization
+    static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+    __offload_run_once(&ctrl, __offload_init_library_once);
+
+    // offload is available if COI is available and the number of devices > 0
+    bool is_available = COI::is_available && (mic_engines_total > 0);
+
+    // register pending libraries if there are any
+    if (is_available && __target_libs) {
+        mutex_locker_t locker(__target_libs_lock);
+
+        for (TargetImageList::iterator it = __target_libs_list.begin();
+             it != __target_libs_list.end(); it++) {
+            // Register library in COI
+            COI::ProcessRegisterLibraries(1, &it->data, &it->size,
+                                          &it->origin, &it->offset);
+
+            // add lib to all engines
+            for (int i = 0; i < mic_engines_total; i++) {
+                mic_engines[i].add_lib(*it);
+            }
+        }
+
+        __target_libs = false;
+        __target_libs_list.clear();
+    }
+
+    return is_available;
+}
+
+extern "C" void __offload_register_image(const void *target_image)
+{
+    const struct Image *image = static_cast<const struct Image*>(target_image);
+
+    // decode image
+    const char *name = image->data;
+    const void *data = image->data + strlen(image->data) + 1;
+    uint64_t    size = image->size;
+    const char *origin = 0;
+    uint64_t    offset = 0;
+
+    // our actions depend on the image type
+    const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+    switch (hdr->e_type) {
+        case ET_EXEC:
+            // Each offload application is supposed to have only one target
+            // image representing target executable.
+            // No thread synchronization is required here as the initialization
+            // code is always executed in a single thread.
+            if (__target_exe != 0) {
+                LIBOFFLOAD_ERROR(c_multiple_target_exes);
+                exit(1);
+            }
+            __target_exe = new TargetImage(name, data, size, origin, offset);
+
+            // Registration code for execs is always called from the context
+            // of main and thus we can safely call any function here,
+            // including LoadLibrary API on windows. This is the place where
+            // we do the offload library initialization.
+            if (__offload_init_library()) {
+                // initialize engine if init_type is on_start
+                if (__offload_init_type == c_init_on_start) {
+                    for (int i = 0; i < mic_engines_total; i++) {
+                        mic_engines[i].init();
+                    }
+                }
+            }
+            break;
+
+        case ET_DYN:
+            // Registration code for libraries is called from the DllMain
+            // context (on windows) and thus we cannot do anything usefull
+            // here. So we just add it to the list of pending libraries for
+            // the later use.
+            __target_libs_lock.lock();
+            __target_libs = true;
+            __target_libs_list.push_back(TargetImage(name, data, size,
+                                                     origin, offset));
+            __target_libs_lock.unlock();
+            break;
+
+        default:
+            // something is definitely wrong, issue an error and exit
+            LIBOFFLOAD_ERROR(c_unknown_binary_type);
+            exit(1);
+    }
+}
+
+extern "C" void __offload_unregister_image(const void *target_image)
+{
+    // Target image is packed as follows:
+    //      8 bytes                - size of the target binary
+    //      null-terminated string - binary name
+    //      <size> bytes           - binary contents
+    const struct Image {
+         int64_t size;
+         char data[];
+    } *image = static_cast<const struct Image*>(target_image);
+
+    // decode image
+    const char *name = image->data;
+    const void *data = image->data + strlen(image->data) + 1;
+
+    // our actions depend on the image type
+    const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+    if (hdr->e_type == ET_EXEC) {
+        // We are executing exec's desctructors.
+        // It is time to do a library cleanup.
+        if (timer_enabled) {
+            Offload_Timer_Print();
+        }
+
+#ifdef MYO_SUPPORT
+        __offload_myoFini();
+#endif // MYO_SUPPORT
+
+        __offload_fini_library();
+    }
+}
+
+// Runtime trace interface for user programs
+
+void __offload_console_trace(int level)
+{
+    console_enabled = level;
+}
+
+// User-visible offload API
+
+int _Offload_number_of_devices(void)
+{
+    __offload_init_library();
+    return mic_engines_total;
+}
+
+int _Offload_get_device_number(void)
+{
+    return -1;
+}
+
+int _Offload_get_physical_device_number(void)
+{
+    return -1;
+}
+
+int _Offload_signaled(int index, void *signal)
+{
+    __offload_init_library();
+
+    // check index value
+    if (index < 0 || mic_engines_total <= 0) {
+        LIBOFFLOAD_ERROR(c_offload_signaled1, index);
+        LIBOFFLOAD_ABORT;
+    }
+
+    // find associated async task
+    OffloadDescriptor *task =
+        mic_engines[index % mic_engines_total].find_signal(signal, false);
+    if (task == 0) {
+        LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
+        LIBOFFLOAD_ABORT;
+    }
+
+    return task->is_signaled();
+}
+
+void _Offload_report(int val)
+{
+    if (val == OFFLOAD_REPORT_ON ||
+        val == OFFLOAD_REPORT_OFF) {
+        offload_report_enabled = val;
+    }
+}
+
+// IDB support
+int   __dbg_is_attached = 0;
+int   __dbg_target_id = -1;
+pid_t __dbg_target_so_pid = -1;
+char  __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
+const int __dbg_api_major_version = 1;
+const int __dbg_api_minor_version = 0;
+
+void __dbg_target_so_loaded()
+{
+}
+void __dbg_target_so_unloaded()
+{
+}

Added: openmp/trunk/offload/src/offload_host.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_host.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_host.h (added)
+++ openmp/trunk/offload/src/offload_host.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,343 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The parts of the runtime library used only on the host
+*/
+
+#ifndef OFFLOAD_HOST_H_INCLUDED
+#define OFFLOAD_HOST_H_INCLUDED
+
+#ifndef TARGET_WINNT
+#include <unistd.h>
+#endif // TARGET_WINNT
+#include "offload_common.h"
+#include "offload_util.h"
+#include "offload_engine.h"
+#include "offload_env.h"
+#include "offload_orsl.h"
+#include "coi/coi_client.h"
+
+// MIC engines.
+extern Engine*  mic_engines;
+extern uint32_t mic_engines_total;
+
+//! The target image is packed as follows.
+/*!      1. 8 bytes containing the size of the target binary          */
+/*!      2. a null-terminated string which is the binary name         */
+/*!      3. <size> number of bytes that are the contents of the image */
+/*!      The address of symbol __offload_target_image
+             is the address of this structure.                        */
+struct Image {
+     int64_t size; //!< Size in bytes of the target binary name and contents
+     char data[];  //!< The name and contents of the target image
+};
+
+// The offload descriptor.
+class OffloadDescriptor
+{
+public:
+    OffloadDescriptor(
+        int index,
+        _Offload_status *status,
+        bool is_mandatory,
+        bool is_openmp,
+        OffloadHostTimerData * timer_data
+    ) :
+        m_device(mic_engines[index % mic_engines_total]),
+        m_is_mandatory(is_mandatory),
+        m_is_openmp(is_openmp),
+        m_inout_buf(0),
+        m_func_desc(0),
+        m_func_desc_size(0),
+        m_in_deps(0),
+        m_in_deps_total(0),
+        m_out_deps(0),
+        m_out_deps_total(0),
+        m_vars(0),
+        m_vars_extra(0),
+        m_status(status),
+        m_timer_data(timer_data)
+    {}
+
+    ~OffloadDescriptor()
+    {
+        if (m_in_deps != 0) {
+            free(m_in_deps);
+        }
+        if (m_out_deps != 0) {
+            free(m_out_deps);
+        }
+        if (m_func_desc != 0) {
+            free(m_func_desc);
+        }
+        if (m_vars != 0) {
+            free(m_vars);
+            free(m_vars_extra);
+        }
+    }
+
+    bool offload(const char *name, bool is_empty,
+                 VarDesc *vars, VarDesc2 *vars2, int vars_total,
+                 const void **waits, int num_waits, const void **signal,
+                 int entry_id, const void *stack_addr);
+    bool offload_finish();
+
+    bool is_signaled();
+
+    OffloadHostTimerData* get_timer_data() const {
+        return m_timer_data;
+    }
+
+private:
+    bool wait_dependencies(const void **waits, int num_waits);
+    bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
+                           int entry_id, const void *stack_addr);
+    bool setup_misc_data(const char *name);
+    bool send_pointer_data(bool is_async);
+    bool send_noncontiguous_pointer_data(
+        int i,
+        PtrData* src_buf,
+        PtrData* dst_buf,
+        COIEVENT *event);
+    bool recieve_noncontiguous_pointer_data(
+        int i,
+        char* src_data,
+        COIBUFFER dst_buf,
+        COIEVENT *event);
+
+    bool gather_copyin_data();
+
+    bool compute();
+
+    bool receive_pointer_data(bool is_async);
+    bool scatter_copyout_data();
+
+    void cleanup();
+
+    bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
+                       int64_t length, bool error_does_not_exist = true);
+    bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
+                        int64_t length, int64_t alloc_disp, int align);
+    bool init_static_ptr_data(PtrData *ptr_data);
+    bool init_mic_address(PtrData *ptr_data);
+    bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
+                                      int buf_size, int align, bool *is_new);
+    bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
+
+    bool gen_var_descs_for_pointer_array(int i);
+
+    void report_coi_error(error_types msg, COIRESULT res);
+    _Offload_result translate_coi_error(COIRESULT res) const;
+
+private:
+    typedef std::list<COIBUFFER> BufferList;
+
+    // extra data associated with each variable descriptor
+    struct VarExtra {
+        PtrData* src_data;
+        PtrData* dst_data;
+        AutoData* auto_data;
+        int64_t cpu_disp;
+        int64_t cpu_offset;
+        CeanReadRanges *read_rng_src;
+        CeanReadRanges *read_rng_dst;
+        int64_t ptr_arr_offset;
+        bool is_arr_ptr_el;
+    };
+
+    template<typename T> class ReadArrElements {
+    public:
+        ReadArrElements():
+            ranges(NULL),
+            el_size(sizeof(T)),
+            offset(0),
+            count(0),
+            is_empty(true),
+            base(NULL)
+        {}
+
+        bool read_next(bool flag)
+        {
+            if (flag != 0) {
+                if (is_empty) {
+                    if (ranges) {
+                        if (!get_next_range(ranges, &offset)) {
+                            // ranges are over
+                            return false;
+                        }
+                    }
+                    // all contiguous elements are over
+                    else if (count != 0) {
+                        return false;
+                    }
+
+                    length_cur = size;
+                }
+                else {
+                    offset += el_size;
+                }
+                val = (T)get_el_value(base, offset, el_size);
+                length_cur -= el_size;
+                count++;
+                is_empty = length_cur == 0;
+            }
+            return true;
+        }
+    public:
+        CeanReadRanges * ranges;
+        T       val;
+        int     el_size;
+        int64_t size,
+                offset,
+                length_cur;
+        bool    is_empty;
+        int     count;
+        char   *base;
+    };
+
+    // ptr_data for persistent auto objects
+    PtrData*    m_stack_ptr_data;
+    PtrDataList m_destroy_stack;
+
+    // Engine
+    Engine& m_device;
+
+    // if true offload is mandatory
+    bool m_is_mandatory;
+
+    // if true offload has openmp origin
+    const bool m_is_openmp;
+
+    // The Marshaller for the inputs of the offloaded region.
+    Marshaller m_in;
+
+    // The Marshaller for the outputs of the offloaded region.
+    Marshaller m_out;
+
+    // List of buffers that are passed to dispatch call
+    BufferList m_compute_buffers;
+
+    // List of buffers that need to be destroyed at the end of offload
+    BufferList m_destroy_buffers;
+
+    // Variable descriptors
+    VarDesc*  m_vars;
+    VarExtra* m_vars_extra;
+    int       m_vars_total;
+
+    // Pointer to a user-specified status variable
+    _Offload_status *m_status;
+
+    // Function descriptor
+    FunctionDescriptor* m_func_desc;
+    uint32_t            m_func_desc_size;
+
+    // Buffer for transferring copyin/copyout data
+    COIBUFFER m_inout_buf;
+
+    // Dependencies
+    COIEVENT *m_in_deps;
+    uint32_t  m_in_deps_total;
+    COIEVENT *m_out_deps;
+    uint32_t  m_out_deps_total;
+
+    // Timer data
+    OffloadHostTimerData *m_timer_data;
+
+    // copyin/copyout data length
+    uint64_t m_in_datalen;
+    uint64_t m_out_datalen;
+
+    // a boolean value calculated in setup_descriptors. If true we need to do
+    // a run function on the target. Otherwise it may be optimized away.
+    bool m_need_runfunction;
+};
+
+// Initialization types for MIC
+enum OffloadInitType {
+    c_init_on_start,         // all devices before entering main
+    c_init_on_offload,       // single device before starting the first offload
+    c_init_on_offload_all    // all devices before starting the first offload
+};
+
+// Initializes library and registers specified offload image.
+extern "C" void __offload_register_image(const void* image);
+extern "C" void __offload_unregister_image(const void* image);
+
+// Initializes offload runtime library.
+extern int __offload_init_library(void);
+
+// thread data for associating pipelines with threads
+extern pthread_key_t mic_thread_key;
+
+// Environment variables for devices
+extern MicEnvVar mic_env_vars;
+
+// CPU frequency
+extern uint64_t cpu_frequency;
+
+// LD_LIBRARY_PATH for MIC libraries
+extern char* mic_library_path;
+
+// stack size for target
+extern uint32_t mic_stack_size;
+
+// Preallocated memory size for buffers on MIC
+extern uint64_t mic_buffer_size;
+
+// Setting controlling inout proxy
+extern bool  mic_proxy_io;
+extern char* mic_proxy_fs_root;
+
+// Threshold for creating buffers with large pages
+extern uint64_t __offload_use_2mb_buffers;
+
+// offload initialization type
+extern OffloadInitType __offload_init_type;
+
+// Device number to offload to when device is not explicitly specified.
+extern int __omp_device_num;
+
+// target executable
+extern TargetImage* __target_exe;
+
+// IDB support
+
+// Called by the offload runtime after initialization of offload infrastructure
+// has been completed.
+extern "C" void  __dbg_target_so_loaded();
+
+// Called by the offload runtime when the offload infrastructure is about to be
+// shut down, currently at application exit.
+extern "C" void  __dbg_target_so_unloaded();
+
+// Null-terminated string containing path to the process image of the hosting
+// application (offload_main)
+#define MAX_TARGET_NAME 512
+extern "C" char  __dbg_target_exe_name[MAX_TARGET_NAME];
+
+// Integer specifying the process id
+extern "C" pid_t __dbg_target_so_pid;
+
+// Integer specifying the 0-based device number
+extern "C" int   __dbg_target_id;
+
+// Set to non-zero by the host-side debugger to enable offload debugging
+// support
+extern "C" int   __dbg_is_attached;
+
+// Major version of the debugger support API
+extern "C" const int __dbg_api_major_version;
+
+// Minor version of the debugger support API
+extern "C" const int __dbg_api_minor_version;
+
+#endif // OFFLOAD_HOST_H_INCLUDED

Added: openmp/trunk/offload/src/offload_myo_host.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_myo_host.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_myo_host.cpp (added)
+++ openmp/trunk/offload/src/offload_myo_host.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,805 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_myo_host.h"
+#include <errno.h>
+#include <malloc.h>
+#include "offload_host.h"
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
+#define MYO_VERSION1    "MYO_1.0"
+
+extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
+extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
+
+#ifndef TARGET_WINNT
+#pragma weak __cilkrts_cilk_for_32
+#pragma weak __cilkrts_cilk_for_64
+#endif // TARGET_WINNT
+
+#ifdef TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
+#else // TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
+#endif // TARGET_WINNT
+
+class MyoWrapper {
+public:
+    MyoWrapper() : m_lib_handle(0), m_is_available(false)
+    {}
+
+    bool is_available() const {
+        return m_is_available;
+    }
+
+    bool LoadLibrary(void);
+
+    // unloads the library
+    void UnloadLibrary(void) {
+//        if (m_lib_handle != 0) {
+//            DL_close(m_lib_handle);
+//            m_lib_handle = 0;
+//        }
+    }
+
+    // Wrappers for MYO client functions
+    void LibInit(void *arg, void *func) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit,
+                                 "%s(%p, %p)\n", __func__, arg, func);
+        CheckResult(__func__, m_lib_init(arg, func));
+    }
+
+    void LibFini(void) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini, "%s()\n", __func__);
+        m_lib_fini();
+    }
+
+    void* SharedMalloc(size_t size) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc,
+                                 "%s(%lld)\n", __func__, size);
+        return m_shared_malloc(size);
+    }
+
+    void SharedFree(void *ptr) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree,
+                                 "%s(%p)\n", __func__, ptr);
+        m_shared_free(ptr);
+    }
+
+    void* SharedAlignedMalloc(size_t size, size_t align) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc,
+                                 "%s(%lld, %lld)\n", __func__, size, align);
+        return m_shared_aligned_malloc(size, align);
+    }
+
+    void SharedAlignedFree(void *ptr) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree,
+                              "%s(%p)\n", __func__, ptr);
+        m_shared_aligned_free(ptr);
+    }
+
+    void Acquire(void) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire,
+                              "%s()\n", __func__);
+        CheckResult(__func__, m_acquire());
+    }
+
+    void Release(void) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease,
+                            "%s()\n", __func__);
+        CheckResult(__func__, m_release());
+    }
+
+    void HostVarTablePropagate(void *table, int num_entries) const {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__, table, num_entries);
+        CheckResult(__func__, m_host_var_table_propagate(table, num_entries));
+    }
+
+    void HostFptrTableRegister(void *table, int num_entries,
+                               int ordered) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister,
+                            "%s(%p, %d, %d)\n", __func__, table,
+                            num_entries, ordered);
+        CheckResult(__func__,
+                    m_host_fptr_table_register(table, num_entries, ordered));
+    }
+
+    void RemoteThunkCall(void *thunk, void *args, int device) {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__, thunk, args,
+                            device);
+        CheckResult(__func__, m_remote_thunk_call(thunk, args, device));
+    }
+
+    MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args,
+                            device);
+        return m_remote_call(func, args, device);
+    }
+
+    void GetResult(MyoiRFuncCallHandle handle) const {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__, handle);
+        CheckResult(__func__, m_get_result(handle));
+    }
+
+private:
+    void CheckResult(const char *func, MyoError error) const {
+        if (error != MYO_SUCCESS) {
+             LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error);
+            exit(1);
+        }
+    }
+
+private:
+    void* m_lib_handle;
+    bool  m_is_available;
+
+    // pointers to functions from myo library
+    MyoError (*m_lib_init)(void*, void*);
+    void     (*m_lib_fini)(void);
+    void*    (*m_shared_malloc)(size_t);
+    void     (*m_shared_free)(void*);
+    void*    (*m_shared_aligned_malloc)(size_t, size_t);
+    void     (*m_shared_aligned_free)(void*);
+    MyoError (*m_acquire)(void);
+    MyoError (*m_release)(void);
+    MyoError (*m_host_var_table_propagate)(void*, int);
+    MyoError (*m_host_fptr_table_register)(void*, int, int);
+    MyoError (*m_remote_thunk_call)(void*, void*, int);
+    MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int);
+    MyoError (*m_get_result)(MyoiRFuncCallHandle);
+};
+
+bool MyoWrapper::LoadLibrary(void)
+{
+#ifndef TARGET_WINNT
+    const char *lib_name = "libmyo-client.so";
+#else // TARGET_WINNT
+    const char *lib_name = "myo-client.dll";
+#endif // TARGET_WINNT
+
+    OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name);
+
+    m_lib_handle = DL_open(lib_name);
+    if (m_lib_handle == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n",
+                            errno);
+        return false;
+    }
+
+    m_lib_init = (MyoError (*)(void*, void*))
+        DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1);
+    if (m_lib_init == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiLibInit");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_lib_fini = (void (*)(void))
+        DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1);
+    if (m_lib_fini == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiLibFini");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_malloc = (void* (*)(size_t))
+        DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1);
+    if (m_shared_malloc == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedMalloc");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_free = (void (*)(void*))
+        DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1);
+    if (m_shared_free == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedFree");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_aligned_malloc = (void* (*)(size_t, size_t))
+        DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1);
+    if (m_shared_aligned_malloc == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedAlignedMalloc");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_aligned_free = (void (*)(void*))
+        DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1);
+    if (m_shared_aligned_free == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedAlignedFree");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_acquire = (MyoError (*)(void))
+        DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1);
+    if (m_acquire == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoAcquire");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_release = (MyoError (*)(void))
+        DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1);
+    if (m_release == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoRelease");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_host_var_table_propagate = (MyoError (*)(void*, int))
+        DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1);
+    if (m_host_var_table_propagate == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiHostVarTablePropagate");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_host_fptr_table_register = (MyoError (*)(void*, int, int))
+        DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1);
+    if (m_host_fptr_table_register == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiHostFptrTableRegister");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_remote_thunk_call = (MyoError (*)(void*, void*, int))
+        DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1);
+    if (m_remote_thunk_call == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiRemoteThunkCall");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int))
+        DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1);
+    if (m_remote_call == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiRemoteCall");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_get_result = (MyoError (*)(MyoiRFuncCallHandle))
+        DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1);
+    if (m_get_result == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiGetResult");
+        UnloadLibrary();
+        return false;
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
+
+    m_is_available = true;
+
+    return true;
+}
+
+static bool myo_is_available;
+static MyoWrapper myo_wrapper;
+
+struct MyoTable
+{
+    MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len)
+    {}
+
+    SharedTableEntry*   var_tab;
+    int                 var_tab_len;
+};
+
+typedef std::list<MyoTable> MyoTableList;
+static MyoTableList __myo_table_list;
+static mutex_t      __myo_table_lock;
+static bool         __myo_tables = false;
+
+static void __offload_myo_shared_table_register(SharedTableEntry *entry);
+static void __offload_myo_shared_init_table_register(InitTableEntry* entry);
+static void __offload_myo_fptr_table_register(FptrTableEntry *entry);
+
+static void __offload_myoLoadLibrary_once(void)
+{
+    if (__offload_init_library()) {
+        myo_wrapper.LoadLibrary();
+    }
+}
+
+static bool __offload_myoLoadLibrary(void)
+{
+    static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+    __offload_run_once(&ctrl, __offload_myoLoadLibrary_once);
+
+    return myo_wrapper.is_available();
+}
+
+static void __offload_myoInit_once(void)
+{
+    if (!__offload_myoLoadLibrary()) {
+        return;
+    }
+
+    // initialize all devices
+    for (int i = 0; i < mic_engines_total; i++) {
+        mic_engines[i].init();
+    }
+
+    // load and initialize MYO library
+    OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
+
+    COIEVENT events[MIC_ENGINES_MAX];
+    MyoiUserParams params[MIC_ENGINES_MAX+1];
+
+    // load target library to all devices
+    for (int i = 0; i < mic_engines_total; i++) {
+        mic_engines[i].init_myo(&events[i]);
+
+        params[i].type = MYOI_USERPARAMS_DEVID;
+        params[i].nodeid = mic_engines[i].get_physical_index() + 1;
+    }
+
+    params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
+
+    // initialize myo runtime on host
+    myo_wrapper.LibInit(params, 0);
+
+    // wait for the target init calls to finish
+    COIRESULT res;
+    res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
+    if (res != COI_SUCCESS) {
+        LIBOFFLOAD_ERROR(c_event_wait, res);
+        exit(1);
+    }
+
+    myo_is_available = true;
+
+    OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
+}
+
+static bool __offload_myoInit(void)
+{
+    static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+    __offload_run_once(&ctrl, __offload_myoInit_once);
+
+    // register pending shared var tables
+    if (myo_is_available && __myo_tables) {
+        mutex_locker_t locker(__myo_table_lock);
+
+        if (__myo_tables) {
+            //  Register tables with MYO so it can propagate to target.
+            for(MyoTableList::const_iterator it = __myo_table_list.begin();
+                it != __myo_table_list.end(); ++it) {
+#ifdef TARGET_WINNT
+                for (SharedTableEntry *entry = it->var_tab;
+                     entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+                    if (entry->varName == 0) {
+                        continue;
+                    }
+                    myo_wrapper.HostVarTablePropagate(entry, 1);
+                }
+#else // TARGET_WINNT
+                myo_wrapper.HostVarTablePropagate(it->var_tab,
+                                                  it->var_tab_len);
+#endif // TARGET_WINNT
+            }
+
+            __myo_table_list.clear();
+            __myo_tables = false;
+        }
+    }
+
+    return myo_is_available;
+}
+
+static bool shared_table_entries(
+    SharedTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->varName == 0) {
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        return true;
+    }
+
+    return false;
+}
+
+static bool fptr_table_entries(
+    FptrTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->funcName == 0) {
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        return true;
+    }
+
+    return false;
+}
+
+extern "C" void __offload_myoRegisterTables(
+    InitTableEntry* init_table,
+    SharedTableEntry *shared_table,
+    FptrTableEntry *fptr_table
+)
+{
+    // check whether we need to initialize MYO library. It is
+    // initialized only if at least one myo table is not empty
+    if (shared_table_entries(shared_table) || fptr_table_entries(fptr_table)) {
+        // make sure myo library is loaded
+        __offload_myoLoadLibrary();
+
+        // register tables
+        __offload_myo_shared_table_register(shared_table);
+        __offload_myo_fptr_table_register(fptr_table);
+        __offload_myo_shared_init_table_register(init_table);
+    }
+}
+
+void __offload_myoFini(void)
+{
+    if (myo_is_available) {
+        OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+        COIEVENT events[MIC_ENGINES_MAX];
+
+        // kick off myoiLibFini calls on all devices
+        for (int i = 0; i < mic_engines_total; i++) {
+            mic_engines[i].fini_myo(&events[i]);
+        }
+
+        // cleanup myo runtime on host
+        myo_wrapper.LibFini();
+
+        // wait for the target fini calls to finish
+        COIRESULT res;
+        res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
+        if (res != COI_SUCCESS) {
+            LIBOFFLOAD_ERROR(c_event_wait, res);
+            exit(1);
+        }
+    }
+}
+
+static void __offload_myo_shared_table_register(
+    SharedTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    SharedTableEntry *start = entry;
+    int entries = 0;
+
+    // allocate shared memory for vars
+    for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->varName == 0) {
+            OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n");
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n",
+                            entry->varName, entry);
+
+        // Invoke the function to create shared memory
+        reinterpret_cast<void(*)(void)>(entry->sharedAddr)();
+        entries++;
+    }
+
+    // and table to the list if it is not empty
+    if (entries > 0) {
+        mutex_locker_t locker(__myo_table_lock);
+        __myo_table_list.push_back(MyoTable(start, entries));
+        __myo_tables = true;
+    }
+}
+
+static void __offload_myo_shared_init_table_register(InitTableEntry* entry)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+#ifdef TARGET_WINNT
+    for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+        if (entry->funcName == 0) {
+            OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n");
+            continue;
+        }
+
+        //  Invoke the function to init the shared memory
+        entry->func();
+    }
+#else // TARGET_WINNT
+    for (; entry->func != 0; entry++) {
+        // Invoke the function to init the shared memory
+        entry->func();
+    }
+#endif // TARGET_WINNT
+}
+
+static void __offload_myo_fptr_table_register(
+    FptrTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    FptrTableEntry *start = entry;
+    int entries = 0;
+
+    for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->funcName == 0) {
+            OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n");
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        if (!myo_wrapper.is_available()) {
+            *(static_cast<void**>(entry->localThunkAddr)) = entry->funcAddr;
+        }
+
+        OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n",
+                            entry->funcName, entry);
+
+#ifdef TARGET_WINNT
+        if (myo_wrapper.is_available()) {
+            myo_wrapper.HostFptrTableRegister(entry, 1, false);
+        }
+#endif // TARGET_WINNT
+
+        entries++;
+    }
+
+#ifndef TARGET_WINNT
+    if (myo_wrapper.is_available() && entries > 0) {
+        myo_wrapper.HostFptrTableRegister(start, entries, false);
+    }
+#endif // TARGET_WINNT
+}
+
+extern "C" int __offload_myoIsAvailable(int target_number)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number);
+
+    if (target_number >= -2) {
+        bool is_default_number = (target_number == -2);
+
+        if (__offload_myoInit()) {
+            if (target_number >= 0) {
+                // User provided the device number
+                int num = target_number % mic_engines_total;
+
+                // reserve device in ORSL
+                target_number = ORSL::reserve(num) ? num : -1;
+            }
+            else {
+                // try to use device 0
+                target_number = ORSL::reserve(0) ? 0 : -1;
+            }
+
+            // make sure device is initialized
+            if (target_number >= 0) {
+                mic_engines[target_number].init();
+            }
+        }
+        else {
+            // fallback to CPU
+            target_number = -1;
+        }
+
+        if (target_number < 0 && !is_default_number) {
+            LIBOFFLOAD_ERROR(c_device_is_not_available);
+            exit(1);
+        }
+    }
+    else {
+        LIBOFFLOAD_ERROR(c_invalid_device_number);
+        exit(1);
+    }
+
+    return target_number;
+}
+
+extern "C" void __offload_myoiRemoteIThunkCall(
+    void *thunk,
+    void *arg,
+    int target_number
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg,
+                        target_number);
+
+    myo_wrapper.Release();
+    myo_wrapper.RemoteThunkCall(thunk, arg, target_number);
+    myo_wrapper.Acquire();
+
+    ORSL::release(target_number);
+}
+
+extern "C" void* _Offload_shared_malloc(size_t size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
+
+    if (__offload_myoLoadLibrary()) {
+        return myo_wrapper.SharedMalloc(size);
+    }
+    else {
+        return malloc(size);
+    }
+}
+
+extern "C" void _Offload_shared_free(void *ptr)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+
+    if (__offload_myoLoadLibrary()) {
+        myo_wrapper.SharedFree(ptr);
+    }
+    else {
+        free(ptr);
+    }
+}
+
+extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
+
+    if (__offload_myoLoadLibrary()) {
+        return myo_wrapper.SharedAlignedMalloc(size, align);
+    }
+    else {
+        if (align < sizeof(void*)) {
+            align = sizeof(void*);
+        }
+        return _mm_malloc(size, align);
+    }
+}
+
+extern "C" void _Offload_shared_aligned_free(void *ptr)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+
+    if (__offload_myoLoadLibrary()) {
+        myo_wrapper.SharedAlignedFree(ptr);
+    }
+    else {
+        _mm_free(ptr);
+    }
+}
+
+extern "C" void __intel_cilk_for_32_offload(
+    int size,
+    void (*copy_constructor)(void*, void*),
+    int target_number,
+    void *raddr,
+    void *closure_object,
+    unsigned int iters,
+    unsigned int grain_size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    target_number = __offload_myoIsAvailable(target_number);
+    if (target_number >= 0) {
+        struct S {
+            void *M1;
+            unsigned int M2;
+            unsigned int M3;
+            char closure[];
+        } *args;
+
+        args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
+        args->M1 = raddr;
+        args->M2 = iters;
+        args->M3 = grain_size;
+
+        if (copy_constructor == 0) {
+            memcpy(args->closure, closure_object, size);
+        }
+        else {
+            copy_constructor(args->closure, closure_object);
+        }
+
+        myo_wrapper.Release();
+        myo_wrapper.GetResult(
+            myo_wrapper.RemoteCall("__intel_cilk_for_32_offload",
+                                   args, target_number)
+        );
+        myo_wrapper.Acquire();
+
+        _Offload_shared_free(args);
+
+        ORSL::release(target_number);
+    }
+    else {
+        __cilkrts_cilk_for_32(raddr,
+                              closure_object,
+                              iters,
+                              grain_size);
+    }
+}
+
+extern "C" void __intel_cilk_for_64_offload(
+    int size,
+    void (*copy_constructor)(void*, void*),
+    int target_number,
+    void *raddr,
+    void *closure_object,
+    uint64_t iters,
+    uint64_t grain_size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    target_number = __offload_myoIsAvailable(target_number);
+    if (target_number >= 0) {
+        struct S {
+            void *M1;
+            uint64_t M2;
+            uint64_t M3;
+            char closure[];
+        } *args;
+
+        args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
+        args->M1 = raddr;
+        args->M2 = iters;
+        args->M3 = grain_size;
+
+        if (copy_constructor == 0) {
+            memcpy(args->closure, closure_object, size);
+        }
+        else {
+            copy_constructor(args->closure, closure_object);
+        }
+
+        myo_wrapper.Release();
+        myo_wrapper.GetResult(
+            myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args,
+                                   target_number)
+        );
+        myo_wrapper.Acquire();
+
+        _Offload_shared_free(args);
+
+        ORSL::release(target_number);
+    }
+    else {
+        __cilkrts_cilk_for_64(raddr,
+                              closure_object,
+                              iters,
+                              grain_size);
+    }
+}

Added: openmp/trunk/offload/src/offload_myo_host.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_myo_host.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_myo_host.h (added)
+++ openmp/trunk/offload/src/offload_myo_host.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,80 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_MYO_HOST_H_INCLUDED
+#define OFFLOAD_MYO_HOST_H_INCLUDED
+
+#include <myotypes.h>
+#include <myoimpl.h>
+#include <myo.h>
+#include "offload.h"
+
+typedef MyoiSharedVarEntry      SharedTableEntry;
+//typedef MyoiHostSharedFptrEntry FptrTableEntry;
+typedef struct {
+    //! Function Name
+    const char *funcName;
+    //! Function Address
+    void *funcAddr;
+    //! Local Thunk Address
+    void *localThunkAddr;
+#ifdef TARGET_WINNT
+    // Dummy to pad up to 32 bytes
+    void *dummy;
+#endif // TARGET_WINNT
+} FptrTableEntry;
+
+struct InitTableEntry {
+#ifdef TARGET_WINNT
+    // Dummy to pad up to 16 bytes
+    // Function Name
+    const char *funcName;
+#endif // TARGET_WINNT
+    void (*func)(void);
+};
+
+#ifdef TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START          ".MyoSharedTable$a"
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END            ".MyoSharedTable$z"
+
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START     ".MyoSharedInitTable$a"
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END       ".MyoSharedInitTable$z"
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START            ".MyoFptrTable$a"
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END              ".MyoFptrTable$z"
+#else  // TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START          ".MyoSharedTable."
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END            ".MyoSharedTable."
+
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START     ".MyoSharedInitTable."
+#define OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END       ".MyoSharedInitTable."
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START            ".MyoFptrTable."
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END              ".MyoFptrTable."
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+
+extern "C" void __offload_myoRegisterTables(
+    InitTableEntry *init_table,
+    SharedTableEntry *shared_table,
+    FptrTableEntry *fptr_table
+);
+
+extern void __offload_myoFini(void);
+
+#endif // OFFLOAD_MYO_HOST_H_INCLUDED

Added: openmp/trunk/offload/src/offload_myo_target.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_myo_target.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_myo_target.cpp (added)
+++ openmp/trunk/offload/src/offload_myo_target.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,184 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_myo_target.h"
+#include "offload_target.h"
+
+extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
+extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
+
+#pragma weak __cilkrts_cilk_for_32
+#pragma weak __cilkrts_cilk_for_64
+
+static void CheckResult(const char *func, MyoError error) {
+    if (error != MYO_SUCCESS) {
+       LIBOFFLOAD_ERROR(c_myotarget_checkresult, func, error);
+        exit(1);
+    }
+}
+
+static void __offload_myo_shared_table_register(SharedTableEntry *entry)
+{
+    int entries = 0;
+    SharedTableEntry *t_start;
+
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    t_start = entry;
+    while (t_start->varName != 0) {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared,
+                              "myo shared entry name = \"%s\" addr = %p\n",
+                              t_start->varName, t_start->sharedAddr);
+        t_start++;
+        entries++;
+    }
+
+    if (entries > 0) {
+        OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry,
+                            entries);
+        CheckResult("myoiMicVarTableRegister",
+                    myoiMicVarTableRegister(entry, entries));
+    }
+}
+
+static void __offload_myo_fptr_table_register(
+    FptrTableEntry *entry
+)
+{
+    int entries = 0;
+    FptrTableEntry *t_start;
+
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    t_start = entry;
+    while (t_start->funcName != 0) {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_fptr,
+                              "myo fptr entry name = \"%s\" addr = %p\n",
+                              t_start->funcName, t_start->funcAddr);
+        t_start++;
+        entries++;
+    }
+
+    if (entries > 0) {
+        OFFLOAD_DEBUG_TRACE(3, "myoiTargetFptrTableRegister(%p, %d, 0)\n",
+                            entry, entries);
+        CheckResult("myoiTargetFptrTableRegister",
+                    myoiTargetFptrTableRegister(entry, entries, 0));
+    }
+}
+
+extern "C" void __offload_myoAcquire(void)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+    CheckResult("myoAcquire", myoAcquire());
+}
+
+extern "C" void __offload_myoRelease(void)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+    CheckResult("myoRelease", myoRelease());
+}
+
+extern "C" void __intel_cilk_for_32_offload_wrapper(void *args_)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    struct S {
+        void *M1;
+        unsigned int M2;
+        unsigned int M3;
+        char closure[];
+    } *args = (struct S*) args_;
+
+    __cilkrts_cilk_for_32(args->M1, args->closure, args->M2, args->M3);
+}
+
+extern "C" void __intel_cilk_for_64_offload_wrapper(void *args_)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    struct S {
+        void *M1;
+        uint64_t M2;
+        uint64_t M3;
+        char closure[];
+    } *args = (struct S*) args_;
+
+    __cilkrts_cilk_for_64(args->M1, args->closure, args->M2, args->M3);
+}
+
+static void __offload_myo_once_init(void)
+{
+    CheckResult("myoiRemoteFuncRegister",
+                myoiRemoteFuncRegister(
+                    (MyoiRemoteFuncType) __intel_cilk_for_32_offload_wrapper,
+                    "__intel_cilk_for_32_offload"));
+    CheckResult("myoiRemoteFuncRegister",
+                myoiRemoteFuncRegister(
+                    (MyoiRemoteFuncType) __intel_cilk_for_64_offload_wrapper,
+                    "__intel_cilk_for_64_offload"));
+}
+
+extern "C" void __offload_myoRegisterTables(
+    SharedTableEntry *shared_table,
+    FptrTableEntry *fptr_table
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    // one time registration of Intel(R) Cilk(TM) language entries
+    static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+    pthread_once(&once_control, __offload_myo_once_init);
+
+    // register module's tables
+    if (shared_table->varName == 0 && fptr_table->funcName == 0) {
+        return;
+    }
+
+    __offload_myo_shared_table_register(shared_table);
+    __offload_myo_fptr_table_register(fptr_table);
+}
+
+extern "C" void* _Offload_shared_malloc(size_t size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
+    return myoSharedMalloc(size);
+}
+
+extern "C" void _Offload_shared_free(void *ptr)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+    myoSharedFree(ptr);
+}
+
+extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
+    return myoSharedAlignedMalloc(size, align);
+}
+
+extern "C" void _Offload_shared_aligned_free(void *ptr)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+    myoSharedAlignedFree(ptr);
+}
+
+// temporary workaround for blocking behavior of myoiLibInit/Fini calls
+extern "C" void __offload_myoLibInit()
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
+    CheckResult("myoiLibInit", myoiLibInit(0, 0));
+}
+
+extern "C" void __offload_myoLibFini()
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
+    myoiLibFini();
+}

Added: openmp/trunk/offload/src/offload_myo_target.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_myo_target.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_myo_target.h (added)
+++ openmp/trunk/offload/src/offload_myo_target.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_MYO_TARGET_H_INCLUDED
+#define OFFLOAD_MYO_TARGET_H_INCLUDED
+
+#include <myotypes.h>
+#include <myoimpl.h>
+#include <myo.h>
+#include "offload.h"
+
+typedef MyoiSharedVarEntry          SharedTableEntry;
+typedef MyoiTargetSharedFptrEntry   FptrTableEntry;
+
+#ifdef TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START          ".MyoSharedTable$a"
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END            ".MyoSharedTable$z"
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START            ".MyoFptrTable$a"
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END              ".MyoFptrTable$z"
+#else  // TARGET_WINNT
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_START          ".MyoSharedTable."
+#define OFFLOAD_MYO_SHARED_TABLE_SECTION_END            ".MyoSharedTable."
+
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_START            ".MyoFptrTable."
+#define OFFLOAD_MYO_FPTR_TABLE_SECTION_END              ".MyoFptrTable."
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_SHARED_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_MYO_FPTR_TABLE_SECTION_END, read, write)
+
+extern "C" void __offload_myoRegisterTables(
+    SharedTableEntry *shared_table,
+    FptrTableEntry *fptr_table
+);
+
+extern "C" void __offload_myoAcquire(void);
+extern "C" void __offload_myoRelease(void);
+
+// temporary workaround for blocking behavior for myoiLibInit/Fini calls
+extern "C" void __offload_myoLibInit();
+extern "C" void __offload_myoLibFini();
+
+#endif // OFFLOAD_MYO_TARGET_H_INCLUDED

Added: openmp/trunk/offload/src/offload_omp_host.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_omp_host.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_omp_host.cpp (added)
+++ openmp/trunk/offload/src/offload_omp_host.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,851 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <omp.h>
+#include "offload.h"
+#include "compiler_if_host.h"
+
+// OpenMP API
+
+void omp_set_default_device(int num)
+{
+    if (num >= 0) {
+        __omp_device_num = num;
+    }
+}
+
+int omp_get_default_device(void)
+{
+    return __omp_device_num;
+}
+
+int omp_get_num_devices()
+{
+    __offload_init_library();
+    return mic_engines_total;
+}
+
+// OpenMP API wrappers
+
+static void omp_set_int_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int setting,
+    const char* f_name
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          f_name, 0);
+    if (ofld) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(int);
+        vars[0].count = 1;
+        vars[0].ptr = &setting;
+
+        OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+static int omp_get_int_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    const char * f_name
+)
+{
+    int setting = 0;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          f_name, 0);
+    if (ofld) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_out;
+        vars[0].size = sizeof(int);
+        vars[0].count = 1;
+        vars[0].ptr = &setting;
+
+        OFFLOAD_OFFLOAD(ofld, f_name, 0, 1, vars, NULL, 0, 0, 0);
+    }
+    return setting;
+}
+
+void omp_set_num_threads_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+)
+{
+    omp_set_int_target(target_type, target_number, num_threads,
+                       "omp_set_num_threads_target");
+}
+
+int omp_get_max_threads_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "omp_get_max_threads_target");
+}
+
+int omp_get_num_procs_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "omp_get_num_procs_target");
+}
+
+void omp_set_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+)
+{
+    omp_set_int_target(target_type, target_number, num_threads,
+                       "omp_set_dynamic_target");
+}
+
+int omp_get_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "omp_get_dynamic_target");
+}
+
+void omp_set_nested_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int nested
+)
+{
+    omp_set_int_target(target_type, target_number, nested,
+                       "omp_set_nested_target");
+}
+
+int omp_get_nested_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "omp_get_nested_target");
+}
+
+void omp_set_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t kind,
+    int modifier
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[2] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(omp_sched_t);
+        vars[0].count = 1;
+        vars[0].ptr = &kind;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_in;
+        vars[1].size = sizeof(int);
+        vars[1].count = 1;
+        vars[1].ptr = &modifier;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_set_schedule_target",
+                        0, 2, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_get_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t *kind,
+    int *modifier
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[2] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_out;
+        vars[0].size = sizeof(omp_sched_t);
+        vars[0].count = 1;
+        vars[0].ptr = kind;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_out;
+        vars[1].size = sizeof(int);
+        vars[1].count = 1;
+        vars[1].ptr = modifier;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_get_schedule_target",
+                        0, 2, vars, NULL, 0, 0, 0);
+    }
+}
+
+// lock API functions
+
+void omp_init_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_out;
+        vars[0].size = sizeof(omp_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_init_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_destroy_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(omp_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_destroy_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_set_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(omp_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_set_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_unset_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(omp_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_unset_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+int omp_test_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+    int result = 0;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[2] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(omp_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_out;
+        vars[1].size = sizeof(int);
+        vars[1].count = 1;
+        vars[1].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_test_lock_target",
+                        0, 2, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}
+
+// nested lock API functions
+
+void omp_init_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_out;
+        vars[0].size = sizeof(omp_nest_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_init_nest_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_destroy_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(omp_nest_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_destroy_nest_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_set_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(omp_nest_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_set_nest_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void omp_unset_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(omp_nest_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_unset_nest_lock_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+int omp_test_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+    int result = 0;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[2] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(omp_nest_lock_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = lock;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_out;
+        vars[1].size = sizeof(int);
+        vars[1].count = 1;
+        vars[1].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "omp_test_nest_lock_target",
+                        0, 2, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}
+
+// kmp API functions
+
+void kmp_set_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int size
+)
+{
+    omp_set_int_target(target_type, target_number, size,
+                       "kmp_set_stacksize_target");
+}
+
+int kmp_get_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "kmp_get_stacksize_target");
+}
+
+void kmp_set_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    size_t size
+)
+{
+    omp_set_int_target(target_type, target_number, size,
+                       "kmp_set_stacksize_s_target");
+}
+
+size_t kmp_get_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "kmp_get_stacksize_s_target");
+}
+
+void kmp_set_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int time
+)
+{
+    omp_set_int_target(target_type, target_number, time,
+                       "kmp_set_blocktime_target");
+}
+
+int kmp_get_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "kmp_get_blocktime_target");
+}
+
+void kmp_set_library_serial_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        OFFLOAD_OFFLOAD(ofld, "kmp_set_library_serial_target",
+                        0, 0, 0, 0, 0, 0, 0);
+    }
+}
+
+void kmp_set_library_turnaround_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        OFFLOAD_OFFLOAD(ofld, "kmp_set_library_turnaround_target",
+                        0, 0, 0, 0, 0, 0, 0);
+    }
+}
+
+void kmp_set_library_throughput_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        OFFLOAD_OFFLOAD(ofld, "kmp_set_library_throughput_target",
+                        0, 0, 0, 0, 0, 0, 0);
+    }
+}
+
+void kmp_set_library_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int mode
+)
+{
+    omp_set_int_target(target_type, target_number, mode,
+                       "kmp_set_library_target");
+}
+
+int kmp_get_library_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "kmp_get_library_target");
+}
+
+void kmp_set_defaults_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    char const *defaults
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_string_ptr;
+        vars[0].type.dst = c_string_ptr;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].alloc_if = 1;
+        vars[0].free_if = 1;
+        vars[0].ptr = &defaults;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_set_defaults_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+// affinity API functions
+
+void kmp_create_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_out;
+        vars[0].size = sizeof(kmp_affinity_mask_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = mask;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_create_affinity_mask_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+void kmp_destroy_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[1] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(kmp_affinity_mask_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = mask;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_destroy_affinity_mask_target",
+                        0, 1, vars, NULL, 0, 0, 0);
+    }
+}
+
+int kmp_set_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    int result = 1;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[2] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(kmp_affinity_mask_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = mask;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_out;
+        vars[1].size = sizeof(int);
+        vars[1].count = 1;
+        vars[1].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_set_affinity_target",
+                        0, 2, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}
+
+int kmp_get_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    int result = 1;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[2] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_inout;
+        vars[0].size = sizeof(kmp_affinity_mask_target_t);
+        vars[0].count = 1;
+        vars[0].ptr = mask;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_out;
+        vars[1].size = sizeof(int);
+        vars[1].count = 1;
+        vars[1].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_get_affinity_target",
+                        0, 2, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}
+
+int kmp_get_affinity_max_proc_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return omp_get_int_target(target_type, target_number,
+                              "kmp_get_affinity_max_proc_target");
+}
+
+int kmp_set_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    int result = 1;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[3] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(int);
+        vars[0].count = 1;
+        vars[0].ptr = &proc;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_inout;
+        vars[1].size = sizeof(kmp_affinity_mask_target_t);
+        vars[1].count = 1;
+        vars[1].ptr = mask;
+
+        vars[2].type.src = c_data;
+        vars[2].type.dst = c_data;
+        vars[2].direction.bits = c_parameter_out;
+        vars[2].size = sizeof(int);
+        vars[2].count = 1;
+        vars[2].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_set_affinity_mask_proc_target",
+                        0, 3, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}
+
+int kmp_unset_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    int result = 1;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[3] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(int);
+        vars[0].count = 1;
+        vars[0].ptr = &proc;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_inout;
+        vars[1].size = sizeof(kmp_affinity_mask_target_t);
+        vars[1].count = 1;
+        vars[1].ptr = mask;
+
+        vars[2].type.src = c_data;
+        vars[2].type.dst = c_data;
+        vars[2].direction.bits = c_parameter_out;
+        vars[2].size = sizeof(int);
+        vars[2].count = 1;
+        vars[2].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_unset_affinity_mask_proc_target",
+                        0, 3, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}
+
+int kmp_get_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    int result = 1;
+
+    OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(target_type, target_number, 0, NULL,
+                                          __func__, 0);
+    if (ofld != 0) {
+        VarDesc vars[3] = {0};
+
+        vars[0].type.src = c_data;
+        vars[0].type.dst = c_data;
+        vars[0].direction.bits = c_parameter_in;
+        vars[0].size = sizeof(int);
+        vars[0].count = 1;
+        vars[0].ptr = &proc;
+
+        vars[1].type.src = c_data;
+        vars[1].type.dst = c_data;
+        vars[1].direction.bits = c_parameter_in;
+        vars[1].size = sizeof(kmp_affinity_mask_target_t);
+        vars[1].count = 1;
+        vars[1].ptr = mask;
+
+        vars[2].type.src = c_data;
+        vars[2].type.dst = c_data;
+        vars[2].direction.bits = c_parameter_out;
+        vars[2].size = sizeof(int);
+        vars[2].count = 1;
+        vars[2].ptr = &result;
+
+        OFFLOAD_OFFLOAD(ofld, "kmp_get_affinity_mask_proc_target",
+                        0, 3, vars, NULL, 0, 0, 0);
+    }
+    return result;
+}

Added: openmp/trunk/offload/src/offload_omp_target.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_omp_target.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_omp_target.cpp (added)
+++ openmp/trunk/offload/src/offload_omp_target.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,1021 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <omp.h>
+#include "offload.h"
+#include "compiler_if_target.h"
+
+// OpenMP API
+
+void omp_set_default_device(int num)
+{
+}
+
+int omp_get_default_device(void)
+{
+    return mic_index;
+}
+
+int omp_get_num_devices()
+{
+    return mic_engines_total;
+}
+
+// OpenMP API wrappers
+
+static void omp_send_int_to_host(
+    void *ofld_,
+    int setting
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_out;
+    vars[0].ptr = &setting;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+static int omp_get_int_from_host(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    int setting;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &setting;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    OFFLOAD_TARGET_LEAVE(ofld);
+
+    return setting;
+}
+
+void omp_set_num_threads_lrb(
+    void *ofld
+)
+{
+    int num_threads;
+
+    num_threads = omp_get_int_from_host(ofld);
+    omp_set_num_threads(num_threads);
+}
+
+void omp_get_max_threads_lrb(
+    void *ofld
+)
+{
+    int num_threads;
+
+    num_threads = omp_get_max_threads();
+    omp_send_int_to_host(ofld, num_threads);
+}
+
+void omp_get_num_procs_lrb(
+    void *ofld
+)
+{
+    int num_procs;
+
+    num_procs = omp_get_num_procs();
+    omp_send_int_to_host(ofld, num_procs);
+}
+
+void omp_set_dynamic_lrb(
+    void *ofld
+)
+{
+    int dynamic;
+
+    dynamic = omp_get_int_from_host(ofld);
+    omp_set_dynamic(dynamic);
+}
+
+void omp_get_dynamic_lrb(
+    void *ofld
+)
+{
+    int dynamic;
+
+    dynamic = omp_get_dynamic();
+    omp_send_int_to_host(ofld, dynamic);
+}
+
+void omp_set_nested_lrb(
+    void *ofld
+)
+{
+    int nested;
+
+    nested = omp_get_int_from_host(ofld);
+    omp_set_nested(nested);
+}
+
+void omp_get_nested_lrb(
+    void *ofld
+)
+{
+    int nested;
+
+    nested = omp_get_nested();
+    omp_send_int_to_host(ofld, nested);
+}
+
+void omp_set_schedule_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[2] = {0};
+    omp_sched_t kind;
+    int modifier;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &kind;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_in;
+    vars[1].ptr = &modifier;
+
+    OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+    omp_set_schedule(kind, modifier);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_get_schedule_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[2] = {0};
+    omp_sched_t kind;
+    int modifier;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_out;
+    vars[0].ptr = &kind;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_out;
+    vars[1].ptr = &modifier;
+
+    OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+    omp_get_schedule(&kind, &modifier);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// lock API functions
+
+void omp_init_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_out;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_init_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_destroy_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_destroy_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_set_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_set_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_unset_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_unset_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_test_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[2] = {0};
+    omp_lock_target_t lock;
+    int result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &lock;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_out;
+    vars[1].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+    result = omp_test_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// nested lock API functions
+
+void omp_init_nest_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_nest_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_out;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_init_nest_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_destroy_nest_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_nest_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_destroy_nest_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_set_nest_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_nest_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_set_nest_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_unset_nest_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    omp_nest_lock_target_t lock;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &lock;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    omp_unset_nest_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void omp_test_nest_lock_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[2] = {0};
+    omp_nest_lock_target_t lock;
+    int result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &lock;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_out;
+    vars[1].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+    result = omp_test_nest_lock(&lock.lock);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// kmp API functions
+
+void kmp_set_stacksize_lrb(
+    void *ofld
+)
+{
+    int size;
+
+    size = omp_get_int_from_host(ofld);
+    kmp_set_stacksize(size);
+}
+
+void kmp_get_stacksize_lrb(
+    void *ofld
+)
+{
+    int size;
+
+    size = kmp_get_stacksize();
+    omp_send_int_to_host(ofld, size);
+}
+
+void kmp_set_stacksize_s_lrb(
+    void *ofld
+)
+{
+    int size;
+
+    size = omp_get_int_from_host(ofld);
+    kmp_set_stacksize_s(size);
+}
+
+void kmp_get_stacksize_s_lrb(
+    void *ofld
+)
+{
+    int size;
+
+    size = kmp_get_stacksize_s();
+    omp_send_int_to_host(ofld, size);
+}
+
+void kmp_set_blocktime_lrb(
+    void *ofld
+)
+{
+    int time;
+
+    time = omp_get_int_from_host(ofld);
+    kmp_set_blocktime(time);
+}
+
+void kmp_get_blocktime_lrb(
+    void *ofld
+)
+{
+    int time;
+
+    time = kmp_get_blocktime();
+    omp_send_int_to_host(ofld, time);
+}
+
+void kmp_set_library_serial_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+
+    OFFLOAD_TARGET_ENTER(ofld, 0, 0, 0);
+    kmp_set_library_serial();
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_set_library_turnaround_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+
+    OFFLOAD_TARGET_ENTER(ofld, 0, 0, 0);
+    kmp_set_library_turnaround();
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_set_library_throughput_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+
+    OFFLOAD_TARGET_ENTER(ofld, 0, 0, 0);
+    kmp_set_library_throughput();
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_set_library_lrb(
+    void *ofld
+)
+{
+    int mode;
+
+    mode = omp_get_int_from_host(ofld);
+    kmp_set_library(mode);
+}
+
+void kmp_get_library_lrb(
+    void *ofld
+)
+{
+    int mode;
+
+    mode = kmp_get_library();
+    omp_send_int_to_host(ofld, mode);
+}
+
+void kmp_set_defaults_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    char *defaults = 0;
+
+    vars[0].type.src = c_string_ptr;
+    vars[0].type.dst = c_string_ptr;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &defaults;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    kmp_set_defaults(defaults);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// affinity API functions
+
+void kmp_create_affinity_mask_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    kmp_affinity_mask_target_t mask;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_out;
+    vars[0].ptr = &mask;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    kmp_create_affinity_mask(&mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_destroy_affinity_mask_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[1] = {0};
+    kmp_affinity_mask_target_t mask;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &mask;
+
+    OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+    kmp_destroy_affinity_mask(&mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_set_affinity_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[2] = {0};
+    kmp_affinity_mask_target_t mask;
+    int result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &mask;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_out;
+    vars[1].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+    result = kmp_set_affinity(&mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_get_affinity_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[2] = {0};
+    kmp_affinity_mask_target_t mask;
+    int result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_inout;
+    vars[0].ptr = &mask;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_out;
+    vars[1].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+    result = kmp_get_affinity(&mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_get_affinity_max_proc_lrb(
+    void *ofld
+)
+{
+    int max_proc;
+
+    max_proc = kmp_get_affinity_max_proc();
+    omp_send_int_to_host(ofld, max_proc);
+}
+
+void kmp_set_affinity_mask_proc_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[3] = {0};
+    kmp_affinity_mask_target_t mask;
+    int proc, result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &proc;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_inout;
+    vars[1].ptr = &mask;
+
+    vars[2].type.src = c_data;
+    vars[2].type.dst = c_data;
+    vars[2].direction.bits = c_parameter_out;
+    vars[2].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 3, vars, NULL);
+    result = kmp_set_affinity_mask_proc(proc, &mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_unset_affinity_mask_proc_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[3] = {0};
+    kmp_affinity_mask_target_t mask;
+    int proc, result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &proc;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_inout;
+    vars[1].ptr = &mask;
+
+    vars[2].type.src = c_data;
+    vars[2].type.dst = c_data;
+    vars[2].direction.bits = c_parameter_out;
+    vars[2].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 3, vars, NULL);
+    result = kmp_unset_affinity_mask_proc(proc, &mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+void kmp_get_affinity_mask_proc_lrb(
+    void *ofld_
+)
+{
+    OFFLOAD ofld = (OFFLOAD) ofld_;
+    VarDesc vars[3] = {0};
+    kmp_affinity_mask_target_t mask;
+    int proc, result;
+
+    vars[0].type.src = c_data;
+    vars[0].type.dst = c_data;
+    vars[0].direction.bits = c_parameter_in;
+    vars[0].ptr = &proc;
+
+    vars[1].type.src = c_data;
+    vars[1].type.dst = c_data;
+    vars[1].direction.bits = c_parameter_in;
+    vars[1].ptr = &mask;
+
+    vars[2].type.src = c_data;
+    vars[2].type.dst = c_data;
+    vars[2].direction.bits = c_parameter_out;
+    vars[2].ptr = &result;
+
+    OFFLOAD_TARGET_ENTER(ofld, 3, vars, NULL);
+    result = kmp_get_affinity_mask_proc(proc, &mask.mask);
+    OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// Target-side stubs for the host functions (to avoid unresolveds)
+// These are needed for the offloadm table
+
+void omp_set_num_threads_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+)
+{
+}
+
+int omp_get_max_threads_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+int omp_get_num_procs_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void omp_set_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+)
+{
+}
+
+int omp_get_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void omp_set_nested_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+)
+{
+}
+
+int omp_get_nested_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void omp_set_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t kind,
+    int modifier
+)
+{
+}
+
+void omp_get_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t *kind,
+    int *modifier
+)
+{
+}
+
+void omp_init_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+}
+
+void omp_destroy_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+}
+
+void omp_set_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+}
+
+void omp_unset_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+}
+
+int omp_test_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+)
+{
+    return 0;
+}
+
+void omp_init_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+}
+
+void omp_destroy_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+}
+
+void omp_set_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+}
+
+void omp_unset_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+}
+
+int omp_test_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+)
+{
+    return 0;
+}
+
+void kmp_set_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int size
+)
+{
+}
+
+int kmp_get_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void kmp_set_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    size_t size
+)
+{
+}
+
+size_t kmp_get_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void kmp_set_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int time
+)
+{
+}
+
+int kmp_get_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void kmp_set_library_serial_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+}
+
+void kmp_set_library_turnaround_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+}
+
+void kmp_set_library_throughput_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+}
+
+void kmp_set_library_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int mode
+)
+{
+}
+
+int kmp_get_library_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+void kmp_set_defaults_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    char const *defaults
+)
+{
+}
+
+void kmp_create_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+}
+
+void kmp_destroy_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+}
+
+int kmp_set_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    return 0;
+}
+
+int kmp_get_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    return 0;
+}
+
+int kmp_get_affinity_max_proc_target(
+    TARGET_TYPE target_type,
+    int target_number
+)
+{
+    return 0;
+}
+
+int kmp_set_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    return 0;
+}
+
+int kmp_unset_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    return 0;
+}
+
+int kmp_get_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+)
+{
+    return 0;
+}

Added: openmp/trunk/offload/src/offload_orsl.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_orsl.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_orsl.cpp (added)
+++ openmp/trunk/offload/src/offload_orsl.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,84 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_orsl.h"
+#include <stdlib.h>
+#include "offload_host.h"
+#include "orsl-lite/include/orsl-lite.h"
+
+namespace ORSL {
+
+static bool            is_enabled = false;
+static const ORSLTag   my_tag = "Offload";
+
+void init()
+{
+    const char *env_var = getenv("OFFLOAD_ENABLE_ORSL");
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            is_enabled = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+                             "OFFLOAD_ENABLE_ORSL");
+        }
+    }
+
+    if (is_enabled) {
+        OFFLOAD_DEBUG_TRACE(2, "ORSL is enabled\n");
+    }
+    else {
+        OFFLOAD_DEBUG_TRACE(2, "ORSL is disabled\n");
+    }
+}
+
+bool reserve(int device)
+{
+    if (is_enabled) {
+        int pnum = mic_engines[device].get_physical_index();
+        ORSLBusySet bset;
+
+        bset.type = BUSY_SET_FULL;
+        if (ORSLReserve(1, &pnum, &bset, my_tag) != 0) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool try_reserve(int device)
+{
+    if (is_enabled) {
+        int pnum = mic_engines[device].get_physical_index();
+        ORSLBusySet bset;
+
+        bset.type = BUSY_SET_FULL;
+        if (ORSLTryReserve(1, &pnum, &bset, my_tag) != 0) {
+            return false;
+        }
+    }
+    return true;
+}
+
+void release(int device)
+{
+    if (is_enabled) {
+        int pnum = mic_engines[device].get_physical_index();
+        ORSLBusySet bset;
+
+        bset.type = BUSY_SET_FULL;
+        if (ORSLRelease(1, &pnum, &bset, my_tag) != 0) {
+            // should never get here
+        }
+    }
+}
+
+} // namespace ORSL

Added: openmp/trunk/offload/src/offload_orsl.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_orsl.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_orsl.h (added)
+++ openmp/trunk/offload/src/offload_orsl.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_ORSL_H_INCLUDED
+#define OFFLOAD_ORSL_H_INCLUDED
+
+// ORSL interface
+namespace ORSL {
+
+extern void init();
+
+extern bool reserve(int device);
+extern bool try_reserve(int device);
+extern void release(int device);
+
+} // namespace ORSL
+
+#endif // OFFLOAD_ORSL_H_INCLUDED

Added: openmp/trunk/offload/src/offload_table.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_table.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_table.cpp (added)
+++ openmp/trunk/offload/src/offload_table.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,375 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_table.h"
+#include "offload_common.h"
+
+#if !HOST_LIBRARY
+// Predefined offload entries
+extern void omp_set_num_threads_lrb(void*);
+extern void omp_get_max_threads_lrb(void*);
+extern void omp_get_num_procs_lrb(void*);
+extern void omp_set_dynamic_lrb(void*);
+extern void omp_get_dynamic_lrb(void*);
+extern void omp_set_nested_lrb(void*);
+extern void omp_get_nested_lrb(void*);
+extern void omp_set_schedule_lrb(void*);
+extern void omp_get_schedule_lrb(void*);
+
+extern void omp_init_lock_lrb(void*);
+extern void omp_destroy_lock_lrb(void*);
+extern void omp_set_lock_lrb(void*);
+extern void omp_unset_lock_lrb(void*);
+extern void omp_test_lock_lrb(void*);
+
+extern void omp_init_nest_lock_lrb(void*);
+extern void omp_destroy_nest_lock_lrb(void*);
+extern void omp_set_nest_lock_lrb(void*);
+extern void omp_unset_nest_lock_lrb(void*);
+extern void omp_test_nest_lock_lrb(void*);
+
+extern void kmp_set_stacksize_lrb(void*);
+extern void kmp_get_stacksize_lrb(void*);
+extern void kmp_set_stacksize_s_lrb(void*);
+extern void kmp_get_stacksize_s_lrb(void*);
+extern void kmp_set_blocktime_lrb(void*);
+extern void kmp_get_blocktime_lrb(void*);
+extern void kmp_set_library_serial_lrb(void*);
+extern void kmp_set_library_turnaround_lrb(void*);
+extern void kmp_set_library_throughput_lrb(void*);
+extern void kmp_set_library_lrb(void*);
+extern void kmp_get_library_lrb(void*);
+extern void kmp_set_defaults_lrb(void*);
+
+extern void kmp_create_affinity_mask_lrb(void*);
+extern void kmp_destroy_affinity_mask_lrb(void*);
+extern void kmp_set_affinity_lrb(void*);
+extern void kmp_get_affinity_lrb(void*);
+extern void kmp_get_affinity_max_proc_lrb(void*);
+extern void kmp_set_affinity_mask_proc_lrb(void*);
+extern void kmp_unset_affinity_mask_proc_lrb(void*);
+extern void kmp_get_affinity_mask_proc_lrb(void*);
+
+// Predefined entries on the target side
+static FuncTable::Entry predefined_entries[] = {
+    "omp_set_num_threads_target",
+    (void*) &omp_set_num_threads_lrb,
+    "omp_get_max_threads_target",
+    (void*) &omp_get_max_threads_lrb,
+    "omp_get_num_procs_target",
+    (void*) &omp_get_num_procs_lrb,
+    "omp_set_dynamic_target",
+    (void*) &omp_set_dynamic_lrb,
+    "omp_get_dynamic_target",
+    (void*) &omp_get_dynamic_lrb,
+    "omp_set_nested_target",
+    (void*) &omp_set_nested_lrb,
+    "omp_get_nested_target",
+    (void*) &omp_get_nested_lrb,
+    "omp_set_schedule_target",
+    (void*) &omp_set_schedule_lrb,
+    "omp_get_schedule_target",
+    (void*) &omp_get_schedule_lrb,
+
+    "omp_init_lock_target",
+    (void*) &omp_init_lock_lrb,
+    "omp_destroy_lock_target",
+    (void*) &omp_destroy_lock_lrb,
+    "omp_set_lock_target",
+    (void*) &omp_set_lock_lrb,
+    "omp_unset_lock_target",
+    (void*) &omp_unset_lock_lrb,
+    "omp_test_lock_target",
+    (void*) &omp_test_lock_lrb,
+
+    "omp_init_nest_lock_target",
+    (void*) &omp_init_nest_lock_lrb,
+    "omp_destroy_nest_lock_target",
+    (void*) &omp_destroy_nest_lock_lrb,
+    "omp_set_nest_lock_target",
+    (void*) &omp_set_nest_lock_lrb,
+    "omp_unset_nest_lock_target",
+    (void*) &omp_unset_nest_lock_lrb,
+    "omp_test_nest_lock_target",
+    (void*) &omp_test_nest_lock_lrb,
+
+    "kmp_set_stacksize_target",
+    (void*) &kmp_set_stacksize_lrb,
+    "kmp_get_stacksize_target",
+    (void*) &kmp_get_stacksize_lrb,
+    "kmp_set_stacksize_s_target",
+    (void*) &kmp_set_stacksize_s_lrb,
+    "kmp_get_stacksize_s_target",
+    (void*) &kmp_get_stacksize_s_lrb,
+    "kmp_set_blocktime_target",
+    (void*) &kmp_set_blocktime_lrb,
+    "kmp_get_blocktime_target",
+    (void*) &kmp_get_blocktime_lrb,
+    "kmp_set_library_serial_target",
+    (void*) &kmp_set_library_serial_lrb,
+    "kmp_set_library_turnaround_target",
+    (void*) &kmp_set_library_turnaround_lrb,
+    "kmp_set_library_throughput_target",
+    (void*) &kmp_set_library_throughput_lrb,
+    "kmp_set_library_target",
+    (void*) &kmp_set_library_lrb,
+    "kmp_get_library_target",
+    (void*) &kmp_get_library_lrb,
+    "kmp_set_defaults_target",
+    (void*) &kmp_set_defaults_lrb,
+
+    "kmp_create_affinity_mask_target",
+    (void*) &kmp_create_affinity_mask_lrb,
+    "kmp_destroy_affinity_mask_target",
+    (void*) &kmp_destroy_affinity_mask_lrb,
+    "kmp_set_affinity_target",
+    (void*) &kmp_set_affinity_lrb,
+    "kmp_get_affinity_target",
+    (void*) &kmp_get_affinity_lrb,
+    "kmp_get_affinity_max_proc_target",
+    (void*) &kmp_get_affinity_max_proc_lrb,
+    "kmp_set_affinity_mask_proc_target",
+    (void*) &kmp_set_affinity_mask_proc_lrb,
+    "kmp_unset_affinity_mask_proc_target",
+    (void*) &kmp_unset_affinity_mask_proc_lrb,
+    "kmp_get_affinity_mask_proc_target",
+    (void*) &kmp_get_affinity_mask_proc_lrb,
+
+    (const char*) -1,
+    (void*) -1
+};
+
+static FuncList::Node predefined_table = {
+    { predefined_entries, -1 },
+    0, 0
+};
+
+// Entry table
+FuncList __offload_entries(&predefined_table);
+#else
+FuncList __offload_entries;
+#endif // !HOST_LIBRARY
+
+// Function table. No predefined entries.
+FuncList __offload_funcs;
+
+// Var table
+VarList  __offload_vars;
+
+// Given the function name returns the associtated function pointer
+const void* FuncList::find_addr(const char *name)
+{
+    const void* func = 0;
+
+    m_lock.lock();
+
+    for (Node *n = m_head; n != 0; n = n->next) {
+        for (const Table::Entry *e = n->table.entries;
+             e->name != (const char*) -1; e++) {
+            if (e->name != 0 && strcmp(e->name, name) == 0) {
+                func = e->func;
+                break;
+            }
+        }
+    }
+
+    m_lock.unlock();
+
+    return func;
+}
+
+// Given the function pointer returns the associtated function name
+const char* FuncList::find_name(const void *func)
+{
+    const char* name = 0;
+
+    m_lock.lock();
+
+    for (Node *n = m_head; n != 0; n = n->next) {
+        for (const Table::Entry *e = n->table.entries;
+             e->name != (const char*) -1; e++) {
+            if (e->func == func) {
+                name = e->name;
+                break;
+            }
+        }
+    }
+
+    m_lock.unlock();
+
+    return name;
+}
+
+// Returns max name length from all tables
+int64_t FuncList::max_name_length(void)
+{
+    if (m_max_name_len < 0) {
+        m_lock.lock();
+
+        m_max_name_len = 0;
+        for (Node *n = m_head; n != 0; n = n->next) {
+            if (n->table.max_name_len < 0) {
+                n->table.max_name_len = 0;
+
+                // calculate max name length in a single table
+                for (const Table::Entry *e = n->table.entries;
+                     e->name != (const char*) -1; e++) {
+                    if (e->name != 0) {
+                        size_t len = strlen(e->name) + 1;
+                        if (n->table.max_name_len < len) {
+                            n->table.max_name_len = len;
+                        }
+                    }
+                }
+            }
+
+            // select max from all tables
+            if (m_max_name_len < n->table.max_name_len) {
+                m_max_name_len = n->table.max_name_len;
+            }
+        }
+
+        m_lock.unlock();
+    }
+    return m_max_name_len;
+}
+
+// Debugging dump
+void FuncList::dump(void)
+{
+    OFFLOAD_DEBUG_TRACE(2, "Function table:\n");
+
+    m_lock.lock();
+
+    for (Node *n = m_head; n != 0; n = n->next) {
+        for (const Table::Entry *e = n->table.entries;
+             e->name != (const char*) -1; e++) {
+            if (e->name != 0) {
+                OFFLOAD_DEBUG_TRACE(2, "%p %s\n", e->func, e->name);
+            }
+        }
+    }
+
+    m_lock.unlock();
+}
+
+// Debugging dump
+void VarList::dump(void)
+{
+    OFFLOAD_DEBUG_TRACE(2, "Var table:\n");
+
+    m_lock.lock();
+
+    for (Node *n = m_head; n != 0; n = n->next) {
+        for (const Table::Entry *e = n->table.entries;
+             e->name != (const char*) -1; e++) {
+            if (e->name != 0) {
+#if HOST_LIBRARY
+                OFFLOAD_DEBUG_TRACE(2, "%s %p %ld\n", e->name, e->addr,
+                                    e->size);
+#else  // HOST_LIBRARY
+                OFFLOAD_DEBUG_TRACE(2, "%s %p\n", e->name, e->addr);
+#endif // HOST_LIBRARY
+            }
+        }
+    }
+
+    m_lock.unlock();
+}
+
+//
+int64_t VarList::table_size(int64_t &nelems)
+{
+    int64_t length = 0;
+
+    nelems = 0;
+
+    // calculate string table size and number of elements
+    for (Node *n = m_head; n != 0; n = n->next) {
+        for (const Table::Entry *e = n->table.entries;
+             e->name != (const char*) -1; e++) {
+            if (e->name != 0) {
+                length += strlen(e->name) + 1;
+                nelems++;
+            }
+        }
+    }
+
+    return nelems * sizeof(BufEntry) + length;
+}
+
+// copy table to the gven buffer
+void VarList::table_copy(void *buf, int64_t nelems)
+{
+    BufEntry* elems = static_cast<BufEntry*>(buf);
+    char*     names = reinterpret_cast<char*>(elems + nelems);
+
+    // copy entries to buffer
+    for (Node *n = m_head; n != 0; n = n->next) {
+        for (const Table::Entry *e = n->table.entries;
+             e->name != (const char*) -1; e++) {
+            if (e->name != 0) {
+                // name field contains offset to the name from the beginning
+                // of the buffer
+                elems->name = names - static_cast<char*>(buf);
+                elems->addr = reinterpret_cast<intptr_t>(e->addr);
+
+                // copy name to string table
+                const char *name = e->name;
+                while ((*names++ = *name++) != '\0');
+
+                elems++;
+            }
+        }
+    }
+}
+
+// patch name offsets in a buffer
+void VarList::table_patch_names(void *buf, int64_t nelems)
+{
+    BufEntry* elems = static_cast<BufEntry*>(buf);
+    for (int i = 0; i < nelems; i++) {
+        elems[i].name += reinterpret_cast<intptr_t>(buf);
+    }
+}
+
+// Adds given list element to the global lookup table list
+extern "C" void __offload_register_tables(
+    FuncList::Node *entry_table,
+    FuncList::Node *func_table,
+    VarList::Node *var_table
+)
+{
+    OFFLOAD_DEBUG_TRACE(2, "Registering offload function entry table %p\n",
+                           entry_table);
+    __offload_entries.add_table(entry_table);
+
+    OFFLOAD_DEBUG_TRACE(2, "Registering function table %p\n", func_table);
+    __offload_funcs.add_table(func_table);
+
+    OFFLOAD_DEBUG_TRACE(2, "Registering var table %p\n", var_table);
+    __offload_vars.add_table(var_table);
+}
+
+// Removes given list element from the global lookup table list
+extern "C" void __offload_unregister_tables(
+    FuncList::Node *entry_table,
+    FuncList::Node *func_table,
+    VarList::Node *var_table
+)
+{
+    __offload_entries.remove_table(entry_table);
+
+    OFFLOAD_DEBUG_TRACE(2, "Unregistering function table %p\n", func_table);
+    __offload_funcs.remove_table(func_table);
+
+    OFFLOAD_DEBUG_TRACE(2, "Unregistering var table %p\n", var_table);
+    __offload_vars.remove_table(var_table);
+}

Added: openmp/trunk/offload/src/offload_table.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_table.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_table.h (added)
+++ openmp/trunk/offload/src/offload_table.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,301 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief Function and Variable tables used by the runtime library
+*/
+
+#ifndef OFFLOAD_TABLE_H_INCLUDED
+#define OFFLOAD_TABLE_H_INCLUDED
+
+#include <iterator>
+#include "offload_util.h"
+
+// Template representing double linked list of tables
+template <typename T> class TableList {
+public:
+    // table type
+    typedef T Table;
+
+    // List node
+    struct Node {
+        Table   table;
+        Node*   prev;
+        Node*   next;
+    };
+
+public:
+    explicit TableList(Node *node = 0) : m_head(node) {}
+
+    void add_table(Node *node) {
+        m_lock.lock();
+
+        if (m_head != 0) {
+            node->next = m_head;
+            m_head->prev = node;
+        }
+        m_head = node;
+
+        m_lock.unlock();
+    }
+
+    void remove_table(Node *node) {
+        m_lock.lock();
+
+        if (node->next != 0) {
+            node->next->prev = node->prev;
+        }
+        if (node->prev != 0) {
+            node->prev->next = node->next;
+        }
+        if (m_head == node) {
+            m_head = node->next;
+        }
+
+        m_lock.unlock();
+    }
+
+protected:
+    Node*           m_head;
+    mutex_t         m_lock;
+};
+
+// Function lookup table.
+struct FuncTable {
+    //! Function table entry
+    /*! This table contains functions created from offload regions.   */
+    /*! Each entry consists of a pointer to the function's "key"
+        and the function address.                                     */
+    /*! Each shared library or executable may contain one such table. */
+    /*! The end of the table is marked with an entry whose name field
+        has value -1.                                                 */
+    struct Entry {
+        const char* name; //!< Name of the function
+        void*       func; //!< Address of the function
+    };
+
+    // entries
+    const Entry *entries;
+
+    // max name length
+    int64_t max_name_len;
+};
+
+// Function table
+class FuncList : public TableList<FuncTable> {
+public:
+    explicit FuncList(Node *node = 0) : TableList<Table>(node),
+                                        m_max_name_len(-1)
+    {}
+
+    // add table to the list
+    void add_table(Node *node) {
+        // recalculate max function name length
+        m_max_name_len = -1;
+
+        // add table
+        TableList<Table>::add_table(node);
+    }
+
+    // find function address for the given name
+    const void* find_addr(const char *name);
+
+    // find function name for the given address
+    const char* find_name(const void *addr);
+
+    // max name length from all tables in the list
+    int64_t max_name_length(void);
+
+    // debug dump
+    void dump(void);
+
+private:
+    // max name length within from all tables
+    int64_t m_max_name_len;
+};
+
+// Table entry for static variables
+struct VarTable {
+    //! Variable table entry
+    /*! This table contains statically allocated variables marked with
+        __declspec(target(mic) or #pragma omp declare target.           */
+    /*! Each entry consists of a pointer to the variable's "key",
+        the variable address and its size in bytes.                     */
+    /*! Because memory allocation is done from the host,
+        the MIC table does not need the size of the variable.           */
+    /*! Padding to make the table entry size a power of 2 is necessary
+        to avoid "holes" between table contributions from different object
+        files on Windows when debug information is specified with /Zi.  */
+    struct Entry {
+        const char* name; //!< Name of the variable
+        void*       addr; //!< Address of the variable
+
+#if HOST_LIBRARY
+        uint64_t    size;
+
+#ifdef TARGET_WINNT
+		// padding to make entry size a power of 2
+        uint64_t    padding;
+#endif // TARGET_WINNT
+#endif
+    };
+
+    // Table terminated by an entry with name == -1
+    const Entry *entries;
+};
+
+// List of var tables
+class VarList : public TableList<VarTable> {
+public:
+    VarList() : TableList<Table>()
+    {}
+
+    // debug dump
+    void dump();
+
+public:
+    // var table list iterator
+    class Iterator : public std::iterator<std::input_iterator_tag,
+                                          Table::Entry> {
+    public:
+        Iterator() : m_node(0), m_entry(0) {}
+
+        explicit Iterator(Node *node) {
+            new_node(node);
+        }
+
+        Iterator& operator++() {
+            if (m_entry != 0) {
+                m_entry++;
+                while (m_entry->name == 0) {
+                    m_entry++;
+                }
+                if (m_entry->name == reinterpret_cast<const char*>(-1)) {
+                    new_node(m_node->next);
+                }
+            }
+            return *this;
+        }
+
+        bool operator==(const Iterator &other) const {
+            return m_entry == other.m_entry;
+        }
+
+        bool operator!=(const Iterator &other) const {
+            return m_entry != other.m_entry;
+        }
+
+        const Table::Entry* operator*() const {
+            return m_entry;
+        }
+
+    private:
+        void new_node(Node *node) {
+            m_node = node;
+            m_entry = 0;
+            while (m_node != 0) {
+                m_entry = m_node->table.entries;
+                while (m_entry->name == 0) {
+                    m_entry++;
+                }
+                if (m_entry->name != reinterpret_cast<const char*>(-1)) {
+                    break;
+                }
+                m_node = m_node->next;
+                m_entry = 0;
+            }
+        }
+
+    private:
+        Node                *m_node;
+        const Table::Entry  *m_entry;
+    };
+
+    Iterator begin() const {
+        return Iterator(m_head);
+    }
+
+    Iterator end() const {
+        return Iterator();
+    }
+
+public:
+    // Entry representation in a copy buffer
+    struct BufEntry {
+        intptr_t name;
+        intptr_t addr;
+    };
+
+    // Calculate the number of elements in the table and
+    // returns the size of buffer for the table
+    int64_t table_size(int64_t &nelems);
+
+    // Copy table contents to given buffer. It is supposed to be large
+    // enough to hold all elements as string table.
+    void table_copy(void *buf, int64_t nelems);
+
+    // Patch name offsets in a table after it's been copied to other side
+    static void table_patch_names(void *buf, int64_t nelems);
+};
+
+extern FuncList __offload_entries;
+extern FuncList __offload_funcs;
+extern VarList  __offload_vars;
+
+// Section names where the lookup tables are stored
+#ifdef TARGET_WINNT
+#define OFFLOAD_ENTRY_TABLE_SECTION_START   ".OffloadEntryTable$a"
+#define OFFLOAD_ENTRY_TABLE_SECTION_END     ".OffloadEntryTable$z"
+
+#define OFFLOAD_FUNC_TABLE_SECTION_START    ".OffloadFuncTable$a"
+#define OFFLOAD_FUNC_TABLE_SECTION_END      ".OffloadFuncTable$z"
+
+#define OFFLOAD_VAR_TABLE_SECTION_START     ".OffloadVarTable$a"
+#define OFFLOAD_VAR_TABLE_SECTION_END       ".OffloadVarTable$z"
+
+#define OFFLOAD_CRTINIT_SECTION_START       ".CRT$XCT"
+
+#pragma section(OFFLOAD_CRTINIT_SECTION_START, read)
+
+#else  // TARGET_WINNT
+
+#define OFFLOAD_ENTRY_TABLE_SECTION_START   ".OffloadEntryTable."
+#define OFFLOAD_ENTRY_TABLE_SECTION_END     ".OffloadEntryTable."
+
+#define OFFLOAD_FUNC_TABLE_SECTION_START    ".OffloadFuncTable."
+#define OFFLOAD_FUNC_TABLE_SECTION_END      ".OffloadFuncTable."
+
+#define OFFLOAD_VAR_TABLE_SECTION_START     ".OffloadVarTable."
+#define OFFLOAD_VAR_TABLE_SECTION_END       ".OffloadVarTable."
+#endif // TARGET_WINNT
+
+#pragma section(OFFLOAD_ENTRY_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_ENTRY_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_FUNC_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_FUNC_TABLE_SECTION_END, read, write)
+
+#pragma section(OFFLOAD_VAR_TABLE_SECTION_START, read, write)
+#pragma section(OFFLOAD_VAR_TABLE_SECTION_END, read, write)
+
+
+// register/unregister given tables
+extern "C" void __offload_register_tables(
+    FuncList::Node *entry_table,
+    FuncList::Node *func_table,
+    VarList::Node *var_table
+);
+
+extern "C" void __offload_unregister_tables(
+    FuncList::Node *entry_table,
+    FuncList::Node *func_table,
+    VarList::Node *var_table
+);
+#endif  // OFFLOAD_TABLE_H_INCLUDED

Added: openmp/trunk/offload/src/offload_target.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_target.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_target.cpp (added)
+++ openmp/trunk/offload/src/offload_target.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,754 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_target.h"
+#include <stdlib.h>
+#include <unistd.h>
+#ifdef SEP_SUPPORT
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#endif // SEP_SUPPORT
+#include <omp.h>
+#include <map>
+
+// typedef offload_func_with_parms.
+// Pointer to function that represents an offloaded entry point.
+// The parameters are a temporary fix for parameters on the stack.
+typedef void (*offload_func_with_parms)(void *);
+
+// Target console and file logging
+const char *prefix;
+int console_enabled = 0;
+int offload_report_level = 0;
+
+// Trace information
+static const char* vardesc_direction_as_string[] = {
+    "NOCOPY",
+    "IN",
+    "OUT",
+    "INOUT"
+};
+static const char* vardesc_type_as_string[] = {
+    "unknown",
+    "data",
+    "data_ptr",
+    "func_ptr",
+    "void_ptr",
+    "string_ptr",
+    "dv",
+    "dv_data",
+    "dv_data_slice",
+    "dv_ptr",
+    "dv_ptr_data",
+    "dv_ptr_data_slice",
+    "cean_var",
+    "cean_var_ptr",
+    "c_data_ptr_array"
+};
+
+int mic_index = -1;
+int mic_engines_total = -1;
+uint64_t mic_frequency = 0;
+int offload_number = 0;
+static std::map<void*, RefInfo*> ref_data;
+static mutex_t add_ref_lock;
+
+#ifdef SEP_SUPPORT
+static const char*  sep_monitor_env = "SEP_MONITOR";
+static bool         sep_monitor = false;
+static const char*  sep_device_env = "SEP_DEVICE";
+static const char*  sep_device =  "/dev/sep3.8/c";
+static int          sep_counter = 0;
+
+#define SEP_API_IOC_MAGIC   99
+#define SEP_IOCTL_PAUSE     _IO (SEP_API_IOC_MAGIC, 31)
+#define SEP_IOCTL_RESUME    _IO (SEP_API_IOC_MAGIC, 32)
+
+static void add_ref_count(void * buf, bool created)
+{
+    mutex_locker_t locker(add_ref_lock);
+    RefInfo * info = ref_data[buf];
+
+    if (info) {
+        info->count++;
+    }
+    else {
+        info = new RefInfo((int)created,(long)1);
+    }
+    info->is_added |= created;
+    ref_data[buf] = info;
+}
+
+static void BufReleaseRef(void * buf)
+{
+    mutex_locker_t locker(add_ref_lock);
+    RefInfo * info = ref_data[buf];
+
+    if (info) {
+        --info->count;
+        if (info->count == 0 && info->is_added) {
+            BufferReleaseRef(buf);
+            info->is_added = 0;
+        }
+    }
+}
+
+static int VTPauseSampling(void)
+{
+    int ret = -1;
+    int handle = open(sep_device, O_RDWR);
+    if (handle > 0) {
+        ret = ioctl(handle, SEP_IOCTL_PAUSE);
+        close(handle);
+    }
+    return ret;
+}
+
+static int VTResumeSampling(void)
+{
+    int ret = -1;
+    int handle = open(sep_device, O_RDWR);
+    if (handle > 0) {
+        ret = ioctl(handle, SEP_IOCTL_RESUME);
+        close(handle);
+    }
+    return ret;
+}
+#endif // SEP_SUPPORT
+
+void OffloadDescriptor::offload(
+    uint32_t  buffer_count,
+    void**    buffers,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
+    const char *name = func->data;
+    OffloadDescriptor ofld;
+    char *in_data = 0;
+    char *out_data = 0;
+    char *timer_data = 0;
+
+    console_enabled = func->console_enabled;
+    timer_enabled = func->timer_enabled;
+    offload_report_level = func->offload_report_level;
+    offload_number = func->offload_number;
+    ofld.set_offload_number(func->offload_number);
+
+#ifdef SEP_SUPPORT
+    if (sep_monitor) {
+        if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
+            OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
+            VTResumeSampling();
+        }
+    }
+#endif // SEP_SUPPORT
+
+    OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
+                          c_offload_start_target_func,
+                          "Offload \"%s\" started\n", name);
+
+    // initialize timer data
+    OFFLOAD_TIMER_INIT();
+
+    OFFLOAD_TIMER_START(c_offload_target_total_time);
+
+    OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
+
+    // get input/output buffer addresses
+    if (func->in_datalen > 0 || func->out_datalen > 0) {
+        if (func->data_offset != 0) {
+            in_data = (char*) misc_data + func->data_offset;
+            out_data = (char*) return_data;
+        }
+        else {
+            char *inout_buf = (char*) buffers[--buffer_count];
+            in_data = inout_buf;
+            out_data = inout_buf;
+        }
+    }
+
+    // assign variable descriptors
+    ofld.m_vars_total = func->vars_num;
+    if (ofld.m_vars_total > 0) {
+        uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
+
+        ofld.m_vars = (VarDesc*) malloc(var_data_len);
+        memcpy(ofld.m_vars, in_data, var_data_len);
+
+        in_data += var_data_len;
+        func->in_datalen -= var_data_len;
+    }
+
+    // timer data
+    if (func->timer_enabled) {
+        uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
+
+        timer_data = out_data;
+        out_data += timer_data_len;
+        func->out_datalen -= timer_data_len;
+    }
+
+    // init Marshallers
+    ofld.m_in.init_buffer(in_data, func->in_datalen);
+    ofld.m_out.init_buffer(out_data, func->out_datalen);
+
+    // copy buffers to offload descriptor
+    std::copy(buffers, buffers + buffer_count,
+              std::back_inserter(ofld.m_buffers));
+
+    OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
+
+    // find offload entry address
+    OFFLOAD_TIMER_START(c_offload_target_func_lookup);
+
+    offload_func_with_parms entry = (offload_func_with_parms)
+        __offload_entries.find_addr(name);
+
+    if (entry == NULL) {
+#if OFFLOAD_DEBUG > 0
+        if (console_enabled > 2) {
+            __offload_entries.dump();
+        }
+#endif
+        LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
+        exit(1);
+    }
+
+    OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
+
+    OFFLOAD_TIMER_START(c_offload_target_func_time);
+
+    // execute offload entry
+    entry(&ofld);
+
+    OFFLOAD_TIMER_STOP(c_offload_target_func_time);
+
+    OFFLOAD_TIMER_STOP(c_offload_target_total_time);
+
+    // copy timer data to the buffer
+    OFFLOAD_TIMER_TARGET_DATA(timer_data);
+
+    OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
+
+#ifdef SEP_SUPPORT
+    if (sep_monitor) {
+        if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
+            OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
+            VTPauseSampling();
+        }
+    }
+#endif // SEP_SUPPORT
+}
+
+void OffloadDescriptor::merge_var_descs(
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int vars_total
+)
+{
+    // number of variable descriptors received from host and generated
+    // locally should match
+    if (m_vars_total < vars_total) {
+        LIBOFFLOAD_ERROR(c_merge_var_descs1);
+        exit(1);
+    }
+
+    for (int i = 0; i < m_vars_total; i++) {
+        if (i < vars_total) {
+            // variable type must match
+            if (m_vars[i].type.bits != vars[i].type.bits) {
+                LIBOFFLOAD_ERROR(c_merge_var_descs2);
+                exit(1);
+            }
+
+            m_vars[i].ptr = vars[i].ptr;
+            m_vars[i].into = vars[i].into;
+
+            const char *var_sname = "";
+            if (vars2 != NULL) {
+                if (vars2[i].sname != NULL) {
+                    var_sname = vars2[i].sname;
+                }
+            }
+            OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
+                "   VarDesc %d, var=%s, %s, %s\n",
+                i, var_sname,
+                vardesc_direction_as_string[m_vars[i].direction.bits],
+                vardesc_type_as_string[m_vars[i].type.src]);
+            if (vars2 != NULL && vars2[i].dname != NULL) {
+                OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
+                    vardesc_type_as_string[m_vars[i].type.dst]);
+            }
+        }
+        OFFLOAD_TRACE(2,
+            "              type_src=%d, type_dstn=%d, direction=%d, "
+            "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
+            "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
+            m_vars[i].type.src,
+            m_vars[i].type.dst,
+            m_vars[i].direction.bits,
+            m_vars[i].alloc_if,
+            m_vars[i].free_if,
+            m_vars[i].align,
+            m_vars[i].mic_offset,
+            m_vars[i].flags.bits,
+            m_vars[i].offset,
+            m_vars[i].size,
+            m_vars[i].count,
+            m_vars[i].ptr,
+            m_vars[i].into);
+    }
+}
+
+void OffloadDescriptor::scatter_copyin_data()
+{
+    OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
+
+    OFFLOAD_DEBUG_TRACE(2, "IN  buffer @ %p size %lld\n",
+                        m_in.get_buffer_start(),
+                        m_in.get_buffer_size());
+    OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
+                             m_in.get_buffer_size());
+
+    // receive data
+    for (int i = 0; i < m_vars_total; i++) {
+        bool src_is_for_mic = (m_vars[i].direction.out ||
+                               m_vars[i].into == NULL);
+        void** ptr_addr = src_is_for_mic ?
+                          static_cast<void**>(m_vars[i].ptr) :
+                          static_cast<void**>(m_vars[i].into);
+        int type = src_is_for_mic ? m_vars[i].type.src :
+                                    m_vars[i].type.dst;
+        bool is_static = src_is_for_mic ?
+                         m_vars[i].flags.is_static :
+                         m_vars[i].flags.is_static_dstn;
+        void *ptr = NULL;
+
+        if (m_vars[i].flags.alloc_disp) {
+            int64_t offset = 0;
+            m_in.receive_data(&offset, sizeof(offset));
+            m_vars[i].offset = -offset;
+        }
+        if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
+            VAR_TYPE_IS_DV_DATA(type)) {
+            ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
+                  reinterpret_cast<ArrDesc*>(ptr_addr) :
+                  *reinterpret_cast<ArrDesc**>(ptr_addr);
+            ptr_addr = reinterpret_cast<void**>(&dvp->Base);
+        }
+
+        // Set pointer values
+        switch (type) {
+            case c_data_ptr_array:
+                {
+                    int j = m_vars[i].ptr_arr_offset;
+                    int max_el = j + m_vars[i].count;
+                    char *dst_arr_ptr = (src_is_for_mic)?
+                        *(reinterpret_cast<char**>(m_vars[i].ptr)) :
+                        reinterpret_cast<char*>(m_vars[i].into);
+
+                    for (; j < max_el; j++) {
+                        if (src_is_for_mic) {
+                            m_vars[j].ptr =
+                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
+                        }
+                        else {
+                            m_vars[j].into =
+                                dst_arr_ptr + m_vars[j].ptr_arr_offset;
+                        }
+                    }
+                }
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+            case c_dv:
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+                if (m_vars[i].alloc_if) {
+                    void *buf;
+                    if (m_vars[i].flags.sink_addr) {
+                        m_in.receive_data(&buf, sizeof(buf));
+                    }
+                    else {
+                        buf = m_buffers.front();
+                        m_buffers.pop_front();
+                    }
+                    if (buf) {
+                        if (!is_static) {
+                            if (!m_vars[i].flags.sink_addr) {
+                                // increment buffer reference
+                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
+                                BufferAddRef(buf);
+                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
+                            }
+                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
+                        }
+                        ptr = static_cast<char*>(buf) +
+                                  m_vars[i].mic_offset +
+                                  (m_vars[i].flags.is_stack_buf ?
+                                   0 : m_vars[i].offset);
+                    }
+                    *ptr_addr = ptr;
+                }
+                else if (m_vars[i].flags.sink_addr) {
+                    void *buf;
+                    m_in.receive_data(&buf, sizeof(buf));
+                    void *ptr = static_cast<char*>(buf) +
+                                    m_vars[i].mic_offset +
+                                    (m_vars[i].flags.is_stack_buf ?
+                                     0 : m_vars[i].offset);
+                    *ptr_addr = ptr;
+                }
+                break;
+
+            case c_func_ptr:
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                if (m_vars[i].alloc_if) {
+                    void *buf;
+                    if (m_vars[i].flags.sink_addr) {
+                        m_in.receive_data(&buf, sizeof(buf));
+                    }
+                    else {
+                        buf = m_buffers.front();
+                        m_buffers.pop_front();
+                    }
+                    if (buf) {
+                        if (!is_static) {
+                            if (!m_vars[i].flags.sink_addr) {
+                                // increment buffer reference
+                                OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
+                                BufferAddRef(buf);
+                                OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
+                            }
+                            add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
+                        }
+                        ptr = static_cast<char*>(buf) +
+                            m_vars[i].mic_offset + m_vars[i].offset;
+                    }
+                    *ptr_addr = ptr;
+                }
+                else if (m_vars[i].flags.sink_addr) {
+                    void *buf;
+                    m_in.receive_data(&buf, sizeof(buf));
+                    ptr = static_cast<char*>(buf) +
+                          m_vars[i].mic_offset + m_vars[i].offset;
+                    *ptr_addr = ptr;
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, type);
+                abort();
+        }
+        // Release obsolete buffers for stack of persistent objects
+        if (type = c_data_ptr &&
+            m_vars[i].flags.is_stack_buf &&
+            !m_vars[i].direction.bits &&
+            m_vars[i].alloc_if &&
+            m_vars[i].size != 0) {
+                for (int j=0; j < m_vars[i].size; j++) {
+                    void *buf;
+                    m_in.receive_data(&buf, sizeof(buf));
+                    BufferReleaseRef(buf);
+                    ref_data.erase(buf);
+                }
+        }
+        // Do copyin
+        switch (m_vars[i].type.dst) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                if (m_vars[i].direction.in &&
+                    !m_vars[i].flags.is_static_dstn) {
+                    int64_t size;
+                    int64_t disp;
+                    char* ptr = m_vars[i].into ?
+                                 static_cast<char*>(m_vars[i].into) :
+                                 static_cast<char*>(m_vars[i].ptr);
+                    if (m_vars[i].type.dst == c_cean_var) {
+                        m_in.receive_data((&size), sizeof(int64_t));
+                        m_in.receive_data((&disp), sizeof(int64_t));
+                    }
+                    else {
+                        size = m_vars[i].size;
+                        disp = 0;
+                    }
+                    m_in.receive_data(ptr + disp, size);
+                }
+                break;
+
+            case c_dv:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    char* ptr = m_vars[i].into ?
+                                 static_cast<char*>(m_vars[i].into) :
+                                 static_cast<char*>(m_vars[i].ptr);
+                    m_in.receive_data(ptr + sizeof(uint64_t),
+                                      m_vars[i].size - sizeof(uint64_t));
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                break;
+
+            case c_func_ptr:
+                if (m_vars[i].direction.in) {
+                    m_in.receive_func_ptr((const void**) m_vars[i].ptr);
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+                abort();
+        }
+    }
+
+    OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
+                  m_in.get_tfr_size());
+
+    OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
+
+    OFFLOAD_TIMER_START(c_offload_target_compute);
+}
+
+void OffloadDescriptor::gather_copyout_data()
+{
+    OFFLOAD_TIMER_STOP(c_offload_target_compute);
+
+    OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
+
+    for (int i = 0; i < m_vars_total; i++) {
+        bool src_is_for_mic = (m_vars[i].direction.out ||
+                               m_vars[i].into == NULL);
+
+        switch (m_vars[i].type.src) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                if (m_vars[i].direction.out &&
+                    !m_vars[i].flags.is_static) {
+                    m_out.send_data(
+                        static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
+                        m_vars[i].size);
+                }
+                break;
+
+            case c_dv:
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+                if (m_vars[i].free_if &&
+                    src_is_for_mic &&
+                    !m_vars[i].flags.is_static) {
+                    void *buf = *static_cast<char**>(m_vars[i].ptr) -
+                                    m_vars[i].mic_offset -
+                                    (m_vars[i].flags.is_stack_buf?
+                                     0 : m_vars[i].offset);
+                    if (buf == NULL) {
+                        break;
+                    }
+                    // decrement buffer reference count
+                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
+                    BufReleaseRef(buf);
+                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
+                }
+                break;
+
+            case c_func_ptr:
+                if (m_vars[i].direction.out) {
+                    m_out.send_func_ptr(*((void**) m_vars[i].ptr));
+                }
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                if (src_is_for_mic &&
+                    m_vars[i].free_if &&
+                    !m_vars[i].flags.is_static) {
+                    ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
+                                    m_vars[i].type.src == c_dv_data_slice) ?
+                        static_cast<ArrDesc*>(m_vars[i].ptr) :
+                        *static_cast<ArrDesc**>(m_vars[i].ptr);
+
+                    void *buf = reinterpret_cast<char*>(dvp->Base) -
+                                m_vars[i].mic_offset -
+                                m_vars[i].offset;
+
+                    if (buf == NULL) {
+                        break;
+                    }
+
+                    // decrement buffer reference count
+                    OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
+                    BufReleaseRef(buf);
+                    OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+                abort();
+        }
+
+        if (m_vars[i].into) {
+            switch (m_vars[i].type.dst) {
+                case c_data_ptr_array:
+                    break;
+                case c_data:
+                case c_void_ptr:
+                case c_cean_var:
+                case c_dv:
+                    break;
+
+                case c_string_ptr:
+                case c_data_ptr:
+                case c_cean_var_ptr:
+                case c_dv_ptr:
+                    if (m_vars[i].direction.in &&
+                        m_vars[i].free_if &&
+                        !m_vars[i].flags.is_static_dstn) {
+                        void *buf = *static_cast<char**>(m_vars[i].into) -
+                                    m_vars[i].mic_offset -
+                                    (m_vars[i].flags.is_stack_buf?
+                                     0 : m_vars[i].offset);
+
+                        if (buf == NULL) {
+                            break;
+                        }
+                        // decrement buffer reference count
+                        OFFLOAD_TIMER_START(
+                            c_offload_target_release_buffer_refs);
+                        BufReleaseRef(buf);
+                        OFFLOAD_TIMER_STOP(
+                            c_offload_target_release_buffer_refs);
+                    }
+                    break;
+
+                case c_func_ptr:
+                    break;
+
+                case c_dv_data:
+                case c_dv_ptr_data:
+                case c_dv_data_slice:
+                case c_dv_ptr_data_slice:
+                    if (m_vars[i].free_if &&
+                        m_vars[i].direction.in &&
+                        !m_vars[i].flags.is_static_dstn) {
+                        ArrDesc *dvp =
+                            (m_vars[i].type.dst == c_dv_data_slice ||
+                             m_vars[i].type.dst == c_dv_data) ?
+                            static_cast<ArrDesc*>(m_vars[i].into) :
+                            *static_cast<ArrDesc**>(m_vars[i].into);
+                        void *buf = reinterpret_cast<char*>(dvp->Base) -
+                              m_vars[i].mic_offset -
+                              m_vars[i].offset;
+
+                        if (buf == NULL) {
+                            break;
+                        }
+                        // decrement buffer reference count
+                        OFFLOAD_TIMER_START(
+                            c_offload_target_release_buffer_refs);
+                        BufReleaseRef(buf);
+                        OFFLOAD_TIMER_STOP(
+                            c_offload_target_release_buffer_refs);
+                    }
+                    break;
+
+                default:
+                    LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+                    abort();
+            }
+        }
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
+                        m_out.get_buffer_start(),
+                        m_out.get_buffer_size());
+
+    OFFLOAD_DEBUG_DUMP_BYTES(2,
+                             m_out.get_buffer_start(),
+                             m_out.get_buffer_size());
+
+    OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
+                  "Total copyout data sent to host: [%lld] bytes\n",
+                  m_out.get_tfr_size());
+
+    OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
+}
+
+void __offload_target_init(void)
+{
+#ifdef SEP_SUPPORT
+    const char* env_var = getenv(sep_monitor_env);
+    if (env_var != 0 && *env_var != '\0') {
+        sep_monitor = atoi(env_var);
+    }
+    env_var = getenv(sep_device_env);
+    if (env_var != 0 && *env_var != '\0') {
+        sep_device = env_var;
+    }
+#endif // SEP_SUPPORT
+
+    prefix = report_get_message_str(c_report_mic);
+
+    // init frequency
+    mic_frequency = COIPerfGetCycleFrequency();
+}
+
+// User-visible offload API
+
+int _Offload_number_of_devices(void)
+{
+    return mic_engines_total;
+}
+
+int _Offload_get_device_number(void)
+{
+    return mic_index;
+}
+
+int _Offload_get_physical_device_number(void)
+{
+    uint32_t index;
+    EngineGetIndex(&index);
+    return index;
+}

Added: openmp/trunk/offload/src/offload_target.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_target.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_target.h (added)
+++ openmp/trunk/offload/src/offload_target.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,100 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The parts of the offload library used only on the target
+
+#ifndef OFFLOAD_TARGET_H_INCLUDED
+#define OFFLOAD_TARGET_H_INCLUDED
+
+#include "offload_common.h"
+#include "coi/coi_server.h"
+
+// The offload descriptor.
+class OffloadDescriptor
+{
+public:
+    ~OffloadDescriptor() {
+        if (m_vars != 0) {
+            free(m_vars);
+        }
+    }
+
+    // Entry point for COI. Synchronously execute offloaded region given
+    // the provided buffers, misc and return data.
+    static void offload(
+        uint32_t  buffer_count,
+        void**    buffers,
+        void*     misc_data,
+        uint16_t  misc_data_len,
+        void*     return_data,
+        uint16_t  return_data_len
+    );
+
+    // scatters input data from in buffer to target variables
+    void scatter_copyin_data();
+
+    // gathers output data to the buffer
+    void gather_copyout_data();
+
+    // merges local variable descriptors with the descriptors received from
+    // host
+    void merge_var_descs(VarDesc *vars, VarDesc2 *vars2, int vars_total);
+
+    int get_offload_number() const {
+        return m_offload_number;
+    }
+
+    void set_offload_number(int number) {
+        m_offload_number = number;
+    }
+
+private:
+    // Constructor
+    OffloadDescriptor() : m_vars(0)
+    {}
+
+private:
+    typedef std::list<void*> BufferList;
+
+    // The Marshaller for the inputs of the offloaded region.
+    Marshaller m_in;
+
+    // The Marshaller for the outputs of the offloaded region.
+    Marshaller m_out;
+
+    // List of buffers that are passed to dispatch call
+    BufferList m_buffers;
+
+    // Variable descriptors received from host
+    VarDesc* m_vars;
+    int      m_vars_total;
+    int      m_offload_number;
+};
+
+// one time target initialization in main
+extern void __offload_target_init(void);
+
+// logical device index
+extern int mic_index;
+
+// total number of available logical devices
+extern int mic_engines_total;
+
+// device frequency (from COI)
+extern uint64_t mic_frequency;
+
+struct RefInfo {
+    RefInfo(bool is_add, long amount):is_added(is_add),count(amount)
+    {}
+    bool is_added;
+    long count;
+};
+
+#endif // OFFLOAD_TARGET_H_INCLUDED

Added: openmp/trunk/offload/src/offload_target_main.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_target_main.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_target_main.cpp (added)
+++ openmp/trunk/offload/src/offload_target_main.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+extern "C" void __offload_target_main(void);
+
+int main(int argc, char ** argv)
+{
+    __offload_target_main();
+    return 0;
+}

Added: openmp/trunk/offload/src/offload_timer.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_timer.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_timer.h (added)
+++ openmp/trunk/offload/src/offload_timer.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,172 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_TIMER_H_INCLUDED
+#define OFFLOAD_TIMER_H_INCLUDED
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include "liboffload_error_codes.h"
+
+extern int timer_enabled;
+
+#ifdef TIMING_SUPPORT
+
+struct OffloadTargetTimerData {
+    uint64_t frequency;
+    struct {
+        uint64_t start;
+        uint64_t total;
+    } phases[c_offload_target_max_phase];
+};
+
+struct OffloadHostTimerData {
+    // source file name and line number
+    const char* file;
+    int         line;
+
+    // host timer data
+    struct {
+        uint64_t start;
+        uint64_t total;
+    } phases[c_offload_host_max_phase];
+
+    uint64_t sent_bytes;
+    uint64_t received_bytes;
+    int card_number;
+    int offload_number;
+
+    // target timer data
+    OffloadTargetTimerData target;
+
+    // next element
+    OffloadHostTimerData *next;
+};
+
+#if HOST_LIBRARY
+
+extern int offload_report_level;
+extern int offload_report_enabled;
+#define OFFLOAD_REPORT_1 1
+#define OFFLOAD_REPORT_2 2
+#define OFFLOAD_REPORT_3 3
+#define OFFLOAD_REPORT_ON 1
+#define OFFLOAD_REPORT_OFF 0
+
+#define OFFLOAD_TIMER_DATALEN() \
+    ((timer_enabled || (offload_report_level && offload_report_enabled)) ? \
+     ((1 + c_offload_target_max_phase) * sizeof(uint64_t)) : 0)
+
+#define OFFLOAD_TIMER_START(timer_data, pnode) \
+    if (timer_enabled || \
+        (offload_report_level && offload_report_enabled)) { \
+        offload_timer_start(timer_data, pnode); \
+    }
+
+#define OFFLOAD_TIMER_STOP(timer_data, pnode) \
+    if (timer_enabled || \
+        (offload_report_level && offload_report_enabled)) { \
+        offload_timer_stop(timer_data, pnode); \
+    }
+
+#define OFFLOAD_TIMER_INIT(file, line) \
+    offload_timer_init(file, line);
+
+#define OFFLOAD_TIMER_TARGET_DATA(timer_data, data) \
+    if (timer_enabled || \
+        (offload_report_level && offload_report_enabled)) { \
+        offload_timer_fill_target_data(timer_data, data); \
+    }
+
+#define OFFLOAD_TIMER_HOST_SDATA(timer_data, data) \
+    if (offload_report_level && offload_report_enabled) { \
+        offload_timer_fill_host_sdata(timer_data, data); \
+    }
+
+#define OFFLOAD_TIMER_HOST_RDATA(timer_data, data) \
+    if (offload_report_level && offload_report_enabled) { \
+        offload_timer_fill_host_rdata(timer_data, data); \
+    }
+
+#define OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, data) \
+    if (offload_report_level && offload_report_enabled) { \
+        offload_timer_fill_host_mic_num(timer_data, data); \
+    }
+
+extern void offload_timer_start(OffloadHostTimerData *,
+                                OffloadHostPhase t_node);
+extern void offload_timer_stop(OffloadHostTimerData *,
+                               OffloadHostPhase t_node);
+extern OffloadHostTimerData * offload_timer_init(const char *file, int line);
+extern void offload_timer_fill_target_data(OffloadHostTimerData *,
+                                           void *data);
+extern void offload_timer_fill_host_sdata(OffloadHostTimerData *,
+                                          uint64_t sent_bytes);
+extern void offload_timer_fill_host_rdata(OffloadHostTimerData *,
+                                          uint64_t sent_bytes);
+extern void offload_timer_fill_host_mic_num(OffloadHostTimerData *,
+                                            int card_number);
+
+// Utility structure for starting/stopping timer
+struct OffloadTimer {
+    OffloadTimer(OffloadHostTimerData *data, OffloadHostPhase phase) :
+        m_data(data),
+        m_phase(phase)
+    {
+        OFFLOAD_TIMER_START(m_data, m_phase);
+    }
+
+    ~OffloadTimer()
+    {
+        OFFLOAD_TIMER_STOP(m_data, m_phase);
+    }
+
+private:
+    OffloadHostTimerData*   m_data;
+    OffloadHostPhase        m_phase;
+};
+
+#else
+
+#define OFFLOAD_TIMER_DATALEN() \
+    ((timer_enabled) ? \
+     ((1 + c_offload_target_max_phase) * sizeof(uint64_t)) : 0)
+
+#define OFFLOAD_TIMER_START(pnode) \
+    if (timer_enabled) offload_timer_start(pnode);
+
+#define OFFLOAD_TIMER_STOP(pnode) \
+    if (timer_enabled) offload_timer_stop(pnode);
+
+#define OFFLOAD_TIMER_INIT() \
+    if (timer_enabled) offload_timer_init();
+
+#define OFFLOAD_TIMER_TARGET_DATA(data) \
+    if (timer_enabled) offload_timer_fill_target_data(data);
+
+extern void offload_timer_start(OffloadTargetPhase t_node);
+extern void offload_timer_stop(OffloadTargetPhase t_node);
+extern void offload_timer_init(void);
+extern void offload_timer_fill_target_data(void *data);
+
+#endif // HOST_LIBRARY
+
+#else // TIMING_SUPPORT
+
+#define OFFLOAD_TIMER_START(...)
+#define OFFLOAD_TIMER_STOP(...)
+#define OFFLOAD_TIMER_INIT(...)
+#define OFFLOAD_TIMER_TARGET_DATA(...)
+#define OFFLOAD_TIMER_DATALEN(...)      (0)
+
+#endif // TIMING_SUPPORT
+
+#endif // OFFLOAD_TIMER_H_INCLUDED

Added: openmp/trunk/offload/src/offload_timer_host.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_timer_host.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_timer_host.cpp (added)
+++ openmp/trunk/offload/src/offload_timer_host.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,359 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_timer.h"
+
+#ifdef __INTEL_COMPILER
+#include <ia32intrin.h>
+#else // __INTEL_COMPILER
+#include <x86intrin.h>
+#endif // __INTEL_COMPILER
+
+#include "offload_host.h"
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+
+int timer_enabled = 0;
+
+#ifdef TIMING_SUPPORT
+
+int offload_report_level = 0;
+int offload_report_enabled = 1;
+
+static const int host_timer_prefix_spaces[] = {
+    /*c_offload_host_setup_buffers*/         0,
+    /*c_offload_host_initialize*/            2,
+    /*c_offload_host_target_acquire*/        2,
+    /*c_offload_host_wait_deps*/             2,
+    /*c_offload_host_setup_buffers*/         2,
+    /*c_offload_host_alloc_buffers*/         4,
+    /*c_offload_host_setup_misc_data*/       2,
+    /*c_offload_host_alloc_data_buffer*/     4,
+    /*c_offload_host_send_pointers*/         2,
+    /*c_offload_host_gather_inputs*/         2,
+    /*c_offload_host_map_in_data_buffer*/    4,
+    /*c_offload_host_unmap_in_data_buffer*/  4,
+    /*c_offload_host_start_compute*/         2,
+    /*c_offload_host_wait_compute*/          2,
+    /*c_offload_host_start_buffers_reads*/   2,
+    /*c_offload_host_scatter_outputs*/       2,
+    /*c_offload_host_map_out_data_buffer*/   4,
+    /*c_offload_host_unmap_out_data_buffer*/ 4,
+    /*c_offload_host_wait_buffers_reads*/    2,
+    /*c_offload_host_destroy_buffers*/       2
+};
+
+const static int target_timer_prefix_spaces[] = {
+/*c_offload_target_total_time*/          0,
+/*c_offload_target_descriptor_setup*/    2,
+/*c_offload_target_func_lookup*/         2,
+/*c_offload_target_func_time*/           2,
+/*c_offload_target_scatter_inputs*/      4,
+/*c_offload_target_add_buffer_refs*/     6,
+/*c_offload_target_compute*/             4,
+/*c_offload_target_gather_outputs*/      4,
+/*c_offload_target_release_buffer_refs*/ 6
+};
+
+static OffloadHostTimerData* timer_data_head;
+static OffloadHostTimerData* timer_data_tail;
+static mutex_t               timer_data_mutex;
+
+static void offload_host_phase_name(std::stringstream &ss, int p_node);
+static void offload_target_phase_name(std::stringstream &ss, int p_node);
+
+extern void Offload_Timer_Print(void)
+{
+    std::string       buf;
+    std::stringstream ss;
+    const char *stars =
+        "**************************************************************";
+
+    ss << "\n\n" << stars << "\n";
+    ss << "                             ";
+    ss << report_get_message_str(c_report_title) << "\n";
+    ss << stars << "\n";
+    double frequency = cpu_frequency;
+
+    for (OffloadHostTimerData *pnode = timer_data_head;
+         pnode != 0; pnode = pnode->next) {
+        ss << "      ";
+        ss << report_get_message_str(c_report_from_file) << " "<< pnode->file;
+        ss << report_get_message_str(c_report_line) << " " << pnode->line;
+        ss << "\n";
+        for (int i = 0; i < c_offload_host_max_phase ; i++) {
+            ss << "          ";
+            offload_host_phase_name(ss, i);
+            ss << "   " << std::fixed << std::setprecision(5);
+            ss << (double)pnode->phases[i].total / frequency << "\n";
+        }
+
+        for (int i = 0; i < c_offload_target_max_phase ; i++) {
+            double time = 0;
+            if (pnode->target.frequency != 0) {
+                time = (double) pnode->target.phases[i].total /
+                       (double) pnode->target.frequency;
+            }
+            ss << "          ";
+            offload_target_phase_name(ss, i);
+            ss << "   " << std::fixed << std::setprecision(5);
+            ss << time << "\n";
+        }
+    }
+
+    buf = ss.str();
+    fprintf(stdout, buf.data());
+    fflush(stdout);
+}
+
+extern void Offload_Report_Prolog(OffloadHostTimerData *pnode)
+{
+    double frequency = cpu_frequency;
+    std::string       buf;
+    std::stringstream ss;
+
+    if (pnode) {
+        // [Offload] [Mic 0] [File]          file.c
+        ss << "[" << report_get_message_str(c_report_offload) << "] [";
+        ss << report_get_message_str(c_report_mic) << " ";
+        ss << pnode->card_number << "] [";
+        ss << report_get_message_str(c_report_file);
+        ss << "]                    " << pnode->file << "\n";
+
+        // [Offload] [Mic 0] [Line]          1234
+        ss << "[" << report_get_message_str(c_report_offload) << "] [";
+        ss << report_get_message_str(c_report_mic) << " ";
+        ss << pnode->card_number << "] [";
+        ss << report_get_message_str(c_report_line);
+        ss << "]                    " << pnode->line << "\n";
+
+        // [Offload] [Mic 0] [Tag]          Tag 1
+        ss << "[" << report_get_message_str(c_report_offload) << "] [";
+        ss << report_get_message_str(c_report_mic) << " ";
+        ss << pnode->card_number << "] [";
+        ss << report_get_message_str(c_report_tag);
+        ss << "]                     " << report_get_message_str(c_report_tag);
+        ss << " " << pnode->offload_number << "\n";
+
+        buf = ss.str();
+        fprintf(stdout, buf.data());
+        fflush(stdout);
+    }
+}
+
+extern void Offload_Report_Epilog(OffloadHostTimerData * timer_data)
+{
+    double frequency = cpu_frequency;
+    std::string       buf;
+    std::stringstream ss;
+
+    OffloadHostTimerData *pnode = timer_data;
+
+    if (!pnode) {
+        return;
+    }
+    ss << "[" << report_get_message_str(c_report_offload) << "] [";
+    ss << report_get_message_str(c_report_host) << "]  [";
+    ss << report_get_message_str(c_report_tag) <<  " ";
+    ss << pnode->offload_number << "] [";
+    ss << report_get_message_str(c_report_cpu_time) << "]        ";
+    ss << std::fixed << std::setprecision(6);
+    ss << (double) pnode->phases[0].total / frequency;
+    ss << report_get_message_str(c_report_seconds) << "\n";
+
+    if (offload_report_level >= OFFLOAD_REPORT_2) {
+        ss << "[" << report_get_message_str(c_report_offload) << "] [";
+        ss << report_get_message_str(c_report_mic);
+        ss << " " << pnode->card_number;
+        ss << "] [" << report_get_message_str(c_report_tag) << " ";
+        ss <<  pnode->offload_number << "] [";
+        ss << report_get_message_str(c_report_cpu_to_mic_data) << "]   ";
+        ss << pnode->sent_bytes << " ";
+        ss << report_get_message_str(c_report_bytes) << "\n";
+    }
+
+    double time = 0;
+    if (pnode->target.frequency != 0) {
+        time = (double) pnode->target.phases[0].total /
+            (double) pnode->target.frequency;
+    }
+    ss << "[" << report_get_message_str(c_report_offload) << "] [";
+    ss << report_get_message_str(c_report_mic) << " ";
+    ss << pnode->card_number<< "] [";
+    ss << report_get_message_str(c_report_tag) <<  " ";
+    ss << pnode->offload_number << "] [";
+    ss << report_get_message_str(c_report_mic_time) << "]        ";
+    ss << std::fixed << std::setprecision(6) << time;
+    ss << report_get_message_str(c_report_seconds) << "\n";
+
+    if (offload_report_level >= OFFLOAD_REPORT_2) {
+        ss << "[" << report_get_message_str(c_report_offload) << "] [";
+        ss << report_get_message_str(c_report_mic);
+        ss << " " << pnode->card_number;
+        ss << "] [" << report_get_message_str(c_report_tag) << " ";
+        ss <<  pnode->offload_number << "] [";
+        ss << report_get_message_str(c_report_mic_to_cpu_data) << "]   ";
+        ss << pnode->received_bytes << " ";
+        ss << report_get_message_str(c_report_bytes) << "\n";
+    }
+    ss << "\n";
+
+    buf = ss.str();
+    fprintf(stdout, buf.data());
+    fflush(stdout);
+
+    offload_report_free_data(timer_data);
+}
+
+extern void offload_report_free_data(OffloadHostTimerData * timer_data)
+{
+    OffloadHostTimerData *pnode_last = NULL;
+
+    for (OffloadHostTimerData *pnode = timer_data_head;
+         pnode != 0; pnode = pnode->next) {
+        if (timer_data == pnode) {
+            if (pnode_last) {
+                pnode_last->next = pnode->next;
+            }
+            else {
+                timer_data_head = pnode->next;
+            }
+            OFFLOAD_FREE(pnode);
+            break;
+        }
+        pnode_last = pnode;
+    }
+}
+
+static void fill_buf_with_spaces(std::stringstream &ss, int num)
+{
+    for (; num > 0; num--) {
+        ss << " ";
+    }
+}
+
+static void offload_host_phase_name(std::stringstream &ss, int p_node)
+{
+    int prefix_spaces;
+    int str_length;
+    int tail_length;
+    const int message_length = 40;
+    char const *str;
+
+    str = report_get_host_stage_str(p_node);
+    prefix_spaces = host_timer_prefix_spaces[p_node];
+    fill_buf_with_spaces(ss, prefix_spaces);
+    str_length = strlen(str);
+    ss << str;
+    tail_length = message_length - prefix_spaces - str_length;
+    tail_length = tail_length > 0? tail_length : 1;
+    fill_buf_with_spaces(ss, tail_length);
+}
+
+static void offload_target_phase_name(std::stringstream &ss, int p_node)
+{
+    int prefix_spaces;
+    int str_length;
+    const int message_length = 40;
+    int tail_length;
+    char const *str;
+
+    str = report_get_target_stage_str(p_node);
+    prefix_spaces = target_timer_prefix_spaces[p_node];
+    fill_buf_with_spaces(ss, prefix_spaces);
+    str_length = strlen(str);
+    ss << str;
+    tail_length = message_length - prefix_spaces - str_length;
+    tail_length = (tail_length > 0)? tail_length : 1;
+    fill_buf_with_spaces(ss, tail_length);
+}
+
+void offload_timer_start(OffloadHostTimerData * timer_data,
+                         OffloadHostPhase p_type)
+{
+    timer_data->phases[p_type].start = _rdtsc();
+}
+
+void offload_timer_stop(OffloadHostTimerData * timer_data,
+                        OffloadHostPhase p_type)
+{
+    timer_data->phases[p_type].total += _rdtsc() -
+                                        timer_data->phases[p_type].start;
+}
+
+void offload_timer_fill_target_data(OffloadHostTimerData * timer_data,
+                                    void *buf)
+{
+    uint64_t *data = (uint64_t*) buf;
+
+    timer_data->target.frequency = *data++;
+    for (int i = 0; i < c_offload_target_max_phase; i++) {
+        timer_data->target.phases[i].total = *data++;
+    }
+}
+
+void offload_timer_fill_host_sdata(OffloadHostTimerData * timer_data,
+                                   uint64_t sent_bytes)
+{
+    if (timer_data) {
+        timer_data->sent_bytes += sent_bytes;
+    }
+}
+
+void offload_timer_fill_host_rdata(OffloadHostTimerData * timer_data,
+                                   uint64_t received_bytes)
+{
+    if (timer_data) {
+        timer_data->received_bytes += received_bytes;
+    }
+}
+
+void offload_timer_fill_host_mic_num(OffloadHostTimerData * timer_data,
+                                     int card_number)
+{
+    if (timer_data) {
+        timer_data->card_number = card_number;
+    }
+}
+
+OffloadHostTimerData* offload_timer_init(const char *file, int line)
+{
+    static bool first_time = true;
+    OffloadHostTimerData* timer_data = NULL;
+
+    timer_data_mutex.lock();
+    {
+        if (timer_enabled ||
+            (offload_report_level && offload_report_enabled)) {
+            timer_data = (OffloadHostTimerData*)
+                OFFLOAD_MALLOC(sizeof(OffloadHostTimerData), 0);
+            memset(timer_data, 0, sizeof(OffloadHostTimerData));
+
+            timer_data->offload_number = OFFLOAD_DEBUG_INCR_OFLD_NUM() - 1;
+
+            if (timer_data_head == 0) {
+                timer_data_head = timer_data;
+                timer_data_tail = timer_data;
+            }
+            else {
+                timer_data_tail->next = timer_data;
+                timer_data_tail = timer_data;
+            }
+
+            timer_data->file = file;
+            timer_data->line = line;
+        }
+    }
+    timer_data_mutex.unlock();
+    return timer_data;
+}
+
+#endif // TIMING_SUPPORT

Added: openmp/trunk/offload/src/offload_timer_target.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_timer_target.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_timer_target.cpp (added)
+++ openmp/trunk/offload/src/offload_timer_target.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_timer.h"
+#include "offload_target.h"
+
+#ifdef __INTEL_COMPILER
+#include <ia32intrin.h>
+#else // __INTEL_COMPILER
+#include <x86intrin.h>
+#endif // __INTEL_COMPILER
+
+
+
+int timer_enabled = 0;
+
+#ifdef TIMING_SUPPORT
+
+#if defined(LINUX) || defined(FREEBSD)
+static __thread OffloadTargetTimerData timer_data;
+#else // WINNT
+static __declspec(thread) OffloadTargetTimerData timer_data;
+#endif // defined(LINUX) || defined(FREEBSD)
+
+
+void offload_timer_start(
+    OffloadTargetPhase p_type
+)
+{
+    timer_data.phases[p_type].start = _rdtsc();
+}
+
+void offload_timer_stop(
+    OffloadTargetPhase p_type
+)
+{
+    timer_data.phases[p_type].total += _rdtsc() -
+                                       timer_data.phases[p_type].start;
+}
+
+void offload_timer_init()
+{
+    memset(&timer_data, 0, sizeof(OffloadTargetTimerData));
+}
+
+void offload_timer_fill_target_data(
+    void *buf
+)
+{
+    uint64_t *data = (uint64_t*) buf;
+
+    timer_data.frequency = mic_frequency;
+    memcpy(data++, &(timer_data.frequency), sizeof(uint64_t));
+
+    for (int i = 0; i < c_offload_target_max_phase; i++) {
+        memcpy(data++, &(timer_data.phases[i].total), sizeof(uint64_t));
+    }
+}
+
+#endif // TIMING_SUPPORT

Added: openmp/trunk/offload/src/offload_trace.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_trace.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_trace.cpp (added)
+++ openmp/trunk/offload/src/offload_trace.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,309 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_trace.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sstream>
+#include "liboffload_error_codes.h"
+
+extern const char *prefix;
+
+#if !HOST_LIBRARY
+extern int mic_index;
+#endif
+
+// The debug routines
+
+static const char * offload_stage(std::stringstream &ss,
+                                  int offload_number,
+                                  const char *tag,
+                                  const char *text,
+                                  bool print_tag)
+{
+    ss << "[" << report_get_message_str(c_report_offload) << "]";
+#if HOST_LIBRARY
+    ss << " [" << prefix << "]";
+    if (print_tag) {
+        ss << "  [" << report_get_message_str(c_report_tag);
+        ss << " " << offload_number << "]";
+    }
+    else {
+        ss << "         ";
+    }
+    ss << " [" << tag << "]";
+    ss << "           " << text;
+#else
+    ss << " [" << prefix << " " << mic_index << "]";
+    if (print_tag) {
+        ss << " [" << report_get_message_str(c_report_tag);
+        ss << " " << offload_number << "]";
+    }
+    ss << " [" << tag << "]";
+    ss << "           " << text;
+#endif
+    return 0;
+}
+
+static const char * offload_signal(std::stringstream &ss,
+                                  int offload_number,
+                                  const char *tag,
+                                  const char *text)
+{
+    ss << "[" << report_get_message_str(c_report_offload) << "]";
+    ss << " [" << prefix << "]";
+    ss << "  [" << report_get_message_str(c_report_tag);
+    ss << " " << offload_number << "]";
+    ss << " [" << tag << "]";
+    ss << "          " << text;
+    return 0;
+}
+
+void offload_stage_print(int stage, int offload_number, ...)
+{
+    std::string buf;
+    std::stringstream ss;
+    char const *str1;
+    char const *str2;
+    va_list va_args;
+    va_start(va_args, offload_number);
+    va_arg(va_args, char*);
+
+    switch (stage) {
+        case c_offload_start:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_start);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_init:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_init);
+            offload_stage(ss, offload_number, str1, str2, false);
+            ss << " " << report_get_message_str(c_report_logical_card);
+            ss << " " << va_arg(va_args, int);
+            ss << " = " << report_get_message_str(c_report_physical_card);
+            ss << " " << va_arg(va_args, int);
+            break;
+        case c_offload_register:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_register);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_init_func:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_init_func);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << ": " << va_arg(va_args, char*);
+            break;
+        case c_offload_create_buf_host:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_create_buf_host);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << ": base=0x" << std::hex << va_arg(va_args, uint64_t);
+            ss << " length=" << std::dec << va_arg(va_args, uint64_t);
+            break;
+        case c_offload_create_buf_mic:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_create_buf_mic);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << ": size=" << va_arg(va_args, uint64_t);
+            ss << " offset=" << va_arg(va_args, int);
+            if (va_arg(va_args,int))
+               ss << " (2M page)";
+            break;
+        case c_offload_send_pointer_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_send_pointer_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_sent_pointer_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_sent_pointer_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << " " << va_arg(va_args, uint64_t);
+            break;
+        case c_offload_gather_copyin_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_gather_copyin_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_copyin_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_copyin_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << " " << va_arg(va_args, uint64_t) << " ";
+            break;
+        case c_offload_compute:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_compute);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_receive_pointer_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_receive_pointer_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_received_pointer_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_received_pointer_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << " " << va_arg(va_args, uint64_t);
+            break;
+        case c_offload_start_target_func:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_start_target_func);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << ": " << va_arg(va_args, char*);
+            break;
+        case c_offload_var:
+            str1 = report_get_message_str(c_report_var);
+            offload_stage(ss, offload_number, str1, "  ", true);
+            va_arg(va_args, int);
+            ss << va_arg(va_args, char*);
+            ss << " " << " " << va_arg(va_args, char*);
+            break;
+        case c_offload_scatter_copyin_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_scatter_copyin_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_gather_copyout_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_gather_copyout_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_scatter_copyout_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_scatter_copyout_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_copyout_data:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_copyout_data);
+            offload_stage(ss, offload_number, str1, str2, true);
+            ss << "   " << va_arg(va_args, uint64_t);
+            break;
+        case c_offload_signal:
+            {
+                uint64_t  *signal;
+                str1 = report_get_message_str(c_report_state_signal);
+                str2 = report_get_message_str(c_report_signal);
+                offload_signal(ss, offload_number, str1, str2);
+	        signal = va_arg(va_args, uint64_t*);
+	        if (signal)
+                   ss << " 0x" << std::hex << *signal;
+                else
+                   ss << " none";
+            }
+            break;
+        case c_offload_wait:
+            {
+                int count;
+                uint64_t  **signal;
+                str1 = report_get_message_str(c_report_state_signal);
+                str2 = report_get_message_str(c_report_wait);
+                offload_signal(ss, offload_number, str1, str2);
+                count = va_arg(va_args, int);
+                signal = va_arg(va_args, uint64_t**);
+                if (count) {
+                    while (count) {
+                        ss << " " << std::hex << signal[count-1];
+                        count--;
+                    }
+                }
+                else
+                    ss << " none";
+            }
+            break;
+        case c_offload_unregister:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_unregister);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_destroy:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_destroy);
+            offload_stage(ss, offload_number, str1, str2, true);
+            break;
+        case c_offload_myoinit:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myoinit);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_myoregister:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myoregister);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_myofini:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myofini);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_mic_myo_shared:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_mic_myo_shared);
+            offload_stage(ss, offload_number, str1, str2, false);
+            ss << " " << va_arg(va_args, char*);
+            break;
+        case c_offload_mic_myo_fptr:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_mic_myo_fptr);
+            offload_stage(ss, offload_number, str1, str2, false);
+            ss << " " << va_arg(va_args, char*);
+            break;
+        case c_offload_myosharedmalloc:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myosharedmalloc);
+            offload_stage(ss, offload_number, str1, str2, false);
+            va_arg(va_args, char*);
+            ss << " " << va_arg(va_args, size_t);
+            break;
+        case c_offload_myosharedfree:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myosharedfree);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_myosharedalignedmalloc:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myosharedalignedmalloc);
+            offload_stage(ss, offload_number, str1, str2, false);
+            va_arg(va_args, char*);
+            ss << " " << va_arg(va_args, size_t);
+            ss << " " << va_arg(va_args, size_t);
+            break;
+        case c_offload_myosharedalignedfree:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myosharedalignedfree);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_myoacquire:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myoacquire);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        case c_offload_myorelease:
+            str1 = report_get_message_str(c_report_state);
+            str2 = report_get_message_str(c_report_myorelease);
+            offload_stage(ss, offload_number, str1, str2, false);
+            break;
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
+            abort();
+    }
+    ss << "\n";
+    buf = ss.str();
+    fprintf(stdout, buf.data());
+    fflush(stdout);
+
+    va_end(va_args);
+    return;
+}

Added: openmp/trunk/offload/src/offload_trace.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_trace.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_trace.h (added)
+++ openmp/trunk/offload/src/offload_trace.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The parts of the offload library common to host and target
+
+void offload_stage_print(int stage, int offload_number, ...);
+
+enum OffloadTraceStage {
+    // Total time spent on the target
+    c_offload_start = 0,
+    c_offload_init,
+    c_offload_register,
+    c_offload_init_func,
+    c_offload_create_buf_host,
+    c_offload_create_buf_mic,
+    c_offload_send_pointer_data,
+    c_offload_sent_pointer_data,
+    c_offload_gather_copyin_data,
+    c_offload_copyin_data,
+    c_offload_compute,
+    c_offload_receive_pointer_data,
+    c_offload_received_pointer_data,
+    c_offload_start_target_func,
+    c_offload_var,
+    c_offload_scatter_copyin_data,
+    c_offload_gather_copyout_data,
+    c_offload_scatter_copyout_data,
+    c_offload_copyout_data,
+    c_offload_signal,
+    c_offload_wait,
+    c_offload_unregister,
+    c_offload_destroy,
+    c_offload_finish,
+    c_offload_myoinit,
+    c_offload_myoregister,
+    c_offload_mic_myo_shared,
+    c_offload_mic_myo_fptr,
+    c_offload_myosharedmalloc,
+    c_offload_myosharedfree,
+    c_offload_myosharedalignedmalloc,
+    c_offload_myosharedalignedfree,
+    c_offload_myoacquire,
+    c_offload_myorelease,
+    c_offload_myofini
+};

Added: openmp/trunk/offload/src/offload_util.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_util.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_util.cpp (added)
+++ openmp/trunk/offload/src/offload_util.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,206 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_util.h"
+#include <errno.h>
+#include "liboffload_error_codes.h"
+
+#ifdef TARGET_WINNT
+void *thread_getspecific(pthread_key_t key)
+{
+    if (key == 0) {
+        return NULL;
+    }
+    else {
+        return TlsGetValue(key);
+    }
+}
+
+int thread_setspecific(pthread_key_t key, const void *value)
+{
+    return (TlsSetValue(key, (LPVOID)value)) ? 0 : GetLastError();
+}
+#endif // TARGET_WINNT
+
+bool __offload_parse_size_string(const char *str, uint64_t &new_size)
+{
+    uint64_t val;
+    char *suffix;
+
+    errno = 0;
+#ifdef TARGET_WINNT
+    val = strtoul(str, &suffix, 10);
+#else // TARGET_WINNT
+    val = strtoull(str, &suffix, 10);
+#endif // TARGET_WINNT
+    if (errno != 0 || suffix == str) {
+        return false;
+    }
+
+    if (suffix[0] == '\0') {
+        // default is Kilobytes
+        new_size = val * 1024;
+        return true;
+    }
+    else if (suffix[1] == '\0') {
+        // Optional suffixes: B (bytes), K (Kilobytes), M (Megabytes),
+        // G (Gigabytes), or T (Terabytes) specify the units.
+        switch (suffix[0]) {
+            case 'b':
+            case 'B':
+                new_size = val;
+                break;
+
+            case 'k':
+            case 'K':
+                new_size = val * 1024;
+                break;
+
+            case 'm':
+            case 'M':
+                new_size = val * 1024 * 1024;
+                break;
+
+            case 'g':
+            case 'G':
+                new_size = val * 1024 * 1024 * 1024;
+                break;
+
+            case 't':
+            case 'T':
+                new_size = val * 1024 * 1024 * 1024 * 1024;
+                break;
+
+            default:
+                return false;
+        }
+        return true;
+    }
+
+    return false;
+}
+
+bool __offload_parse_int_string(const char *str, int64_t &value)
+{
+    int64_t val;
+    char *suffix;
+
+    errno = 0;
+#ifdef TARGET_WINNT
+    val = strtol(str, &suffix, 0);
+#else
+    val = strtoll(str, &suffix, 0);
+#endif
+    if (errno == 0 && suffix != str && *suffix == '\0') {
+        value = val;
+        return true;
+    }
+    return false;
+}
+
+#ifdef TARGET_WINNT
+extern void* DL_open(const char *path)
+{
+    void *handle;
+    int error_mode;
+
+    /*
+     * do not display message box with error if it the call below fails to
+     * load dynamic library.
+     */
+    error_mode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
+
+    /* load dynamic library */
+    handle = (void*) LoadLibrary(path);
+
+    /* restore error mode */
+    SetErrorMode(error_mode);
+
+    return handle;
+}
+
+extern int DL_addr(const void *addr, Dl_info *dl_info)
+{
+    MEMORY_BASIC_INFORMATION mem_info;
+    char mod_name[MAX_PATH];
+    HMODULE mod_handle;
+
+    /* Fill MEMORY_BASIC_INFORMATION struct */
+    if (!VirtualQuery(addr, &mem_info, sizeof(mem_info))) {
+        return 0;
+    }
+    mod_handle = (HMODULE)mem_info.AllocationBase;
+
+    /* ANSI file name for module */
+    if (!GetModuleFileNameA(mod_handle, (char*) mod_name, sizeof(mod_name))) {
+        return 0;
+    }
+    strcpy(dl_info->dli_fname, mod_name);
+    dl_info->dli_fbase = mem_info.BaseAddress;
+    dl_info->dli_saddr = addr;
+    strcpy(dl_info->dli_sname, mod_name);
+    return 1;
+}
+
+// Run once
+static BOOL CALLBACK __offload_run_once_wrapper(
+    PINIT_ONCE initOnce,
+    PVOID parameter,
+    PVOID *context
+)
+{
+    void (*init_routine)(void) = (void(*)(void)) parameter;
+    init_routine();
+    return true;
+}
+
+void __offload_run_once(OffloadOnceControl *ctrl, void (*func)(void))
+{
+    InitOnceExecuteOnce(ctrl, __offload_run_once_wrapper, (void*) func, 0);
+}
+#endif // TARGET_WINNT
+
+/* ARGSUSED */ // version is not used on windows
+void* DL_sym(void *handle, const char *name, const char *version)
+{
+#ifdef TARGET_WINNT
+    return GetProcAddress((HMODULE) handle, name);
+#else // TARGET_WINNT
+    if (version == 0) {
+        return dlsym(handle, name);
+    }
+    else {
+        return dlvsym(handle, name, version);
+    }
+#endif // TARGET_WINNT
+}
+
+int64_t get_el_value(
+                     char *base,
+                     int64_t offset,
+                     int64_t size)
+{
+    int64_t val = 0;
+    switch (size) {
+        case 1:
+            val = static_cast<int64_t>(*((char *)(base + offset)));
+            break;
+        case 2:
+            val = static_cast<int64_t>(*((short *)(base + offset)));
+            break;
+        case 4:
+            val = static_cast<int64_t>(*((int *)(base + offset)));
+            break;
+        default:
+            val = *((int64_t *)(base + offset));
+            break;
+    }
+    return val;
+}

Added: openmp/trunk/offload/src/offload_util.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/offload_util.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/offload_util.h (added)
+++ openmp/trunk/offload/src/offload_util.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,153 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_UTIL_H_INCLUDED
+#define OFFLOAD_UTIL_H_INCLUDED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef TARGET_WINNT
+#include <windows.h>
+#include <process.h>
+#else // TARGET_WINNT
+#include <dlfcn.h>
+#include <pthread.h>
+#endif // TARGET_WINNT
+
+#ifdef TARGET_WINNT
+typedef unsigned pthread_key_t;
+typedef int pid_t;
+
+#define __func__ __FUNCTION__
+#define strtok_r(s,d,p) strtok_s(s,d,p)
+#define strcasecmp(a,b) stricmp(a,b)
+
+#define thread_key_create(key, destructor) \
+    (((*key = TlsAlloc()) > 0) ? 0 : GetLastError())
+#define thread_key_delete(key) TlsFree(key)
+
+#ifndef S_ISREG
+#define S_ISREG(mode)  (((mode) & S_IFMT) == S_IFREG)
+#endif
+
+void*   thread_getspecific(pthread_key_t key);
+int     thread_setspecific(pthread_key_t key, const void *value);
+#else
+#define thread_key_create(key, destructor) \
+            pthread_key_create((key), (destructor))
+#define thread_key_delete(key)  pthread_key_delete(key)
+#define thread_getspecific(key) pthread_getspecific(key)
+#define thread_setspecific(key, value) pthread_setspecific(key, value)
+#endif // TARGET_WINNT
+
+// Mutex implementation
+struct mutex_t {
+    mutex_t() {
+#ifdef TARGET_WINNT
+        InitializeCriticalSection(&m_lock);
+#else // TARGET_WINNT
+        pthread_mutex_init(&m_lock, 0);
+#endif // TARGET_WINNT
+    }
+
+    ~mutex_t() {
+#ifdef TARGET_WINNT
+        DeleteCriticalSection(&m_lock);
+#else // TARGET_WINNT
+        pthread_mutex_destroy(&m_lock);
+#endif // TARGET_WINNT
+    }
+
+    void lock() {
+#ifdef TARGET_WINNT
+        EnterCriticalSection(&m_lock);
+#else // TARGET_WINNT
+        pthread_mutex_lock(&m_lock);
+#endif // TARGET_WINNT
+    }
+
+    void unlock() {
+#ifdef TARGET_WINNT
+        LeaveCriticalSection(&m_lock);
+#else // TARGET_WINNT
+        pthread_mutex_unlock(&m_lock);
+#endif // TARGET_WINNT
+    }
+
+private:
+#ifdef TARGET_WINNT
+    CRITICAL_SECTION    m_lock;
+#else
+    pthread_mutex_t     m_lock;
+#endif
+};
+
+struct mutex_locker_t {
+    mutex_locker_t(mutex_t &mutex) : m_mutex(mutex) {
+        m_mutex.lock();
+    }
+
+    ~mutex_locker_t() {
+        m_mutex.unlock();
+    }
+
+private:
+    mutex_t &m_mutex;
+};
+
+// Dynamic loader interface
+#ifdef TARGET_WINNT
+struct Dl_info
+{
+    char        dli_fname[MAX_PATH];
+    void       *dli_fbase;
+    char        dli_sname[MAX_PATH];
+    const void *dli_saddr;
+};
+
+void*   DL_open(const char *path);
+#define DL_close(handle)        FreeLibrary((HMODULE) (handle))
+int     DL_addr(const void *addr, Dl_info *info);
+#else
+#define DL_open(path)           dlopen((path), RTLD_NOW)
+#define DL_close(handle)        dlclose(handle)
+#define DL_addr(addr, info)     dladdr((addr), (info))
+#endif // TARGET_WINNT
+
+extern void* DL_sym(void *handle, const char *name, const char *version);
+
+// One-time initialization API
+#ifdef TARGET_WINNT
+typedef INIT_ONCE                   OffloadOnceControl;
+#define OFFLOAD_ONCE_CONTROL_INIT   INIT_ONCE_STATIC_INIT
+
+extern void __offload_run_once(OffloadOnceControl *ctrl, void (*func)(void));
+#else
+typedef pthread_once_t              OffloadOnceControl;
+#define OFFLOAD_ONCE_CONTROL_INIT   PTHREAD_ONCE_INIT
+
+#define __offload_run_once(ctrl, func) pthread_once(ctrl, func)
+#endif // TARGET_WINNT
+
+// Parses size specification string.
+extern bool __offload_parse_size_string(const char *str, uint64_t &new_size);
+
+// Parses string with integer value
+extern bool __offload_parse_int_string(const char *str, int64_t &value);
+
+// get value by its base, offset and size
+int64_t get_el_value(
+    char   *base,
+    int64_t offset,
+    int64_t size
+);
+#endif // OFFLOAD_UTIL_H_INCLUDED

Added: openmp/trunk/offload/src/ofldbegin.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/ofldbegin.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/ofldbegin.cpp (added)
+++ openmp/trunk/offload/src/ofldbegin.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,164 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if HOST_LIBRARY
+#include "offload_host.h"
+#include "offload_myo_host.h"
+#else
+#include "compiler_if_target.h"
+#include "offload_target.h"
+#include "offload_myo_target.h"
+#endif
+
+#ifdef TARGET_WINNT
+#define ALLOCATE(name) __declspec(allocate(name))
+#define DLL_LOCAL
+#else // TARGET_WINNT
+#define ALLOCATE(name) __attribute__((section(name)))
+#define DLL_LOCAL  __attribute__((visibility("hidden")))
+#endif // TARGET_WINNT
+
+#if HOST_LIBRARY
+// the host program/shared library should always have __offload_target_image
+// symbol defined. This symbol specifies the beginning of the target program
+// image.
+extern "C" DLL_LOCAL const void* __offload_target_image;
+#else // HOST_LIBRARY
+// Define a weak main which would be used on target side in case usere's
+// source file containing main does not have offload code.
+#pragma weak main
+int main(void)
+{
+    OFFLOAD_TARGET_MAIN();
+    return 0;
+}
+
+#pragma weak MAIN__
+extern "C" int MAIN__(void)
+{
+    OFFLOAD_TARGET_MAIN();
+    return 0;
+}
+#endif // HOST_LIBRARY
+
+// offload section prolog
+ALLOCATE(OFFLOAD_ENTRY_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_entry_table_start = { 0 };
+
+// list element for the current module
+static FuncList::Node __offload_entry_node = {
+    { &__offload_entry_table_start + 1, -1 },
+    0, 0
+};
+
+// offload fp section prolog
+ALLOCATE(OFFLOAD_FUNC_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_func_table_start = { 0 };
+
+// list element for the current module
+static FuncList::Node __offload_func_node = {
+    { &__offload_func_table_start + 1, -1 },
+    0, 0
+};
+
+// offload fp section prolog
+ALLOCATE(OFFLOAD_VAR_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(VarTable::Entry)))
+#endif // TARGET_WINNT
+static VarTable::Entry __offload_var_table_start = { 0 };
+
+// list element for the current module
+static VarList::Node __offload_var_node = {
+    { &__offload_var_table_start + 1 },
+    0, 0
+};
+
+#ifdef MYO_SUPPORT
+
+// offload myo shared var section prolog
+ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(SharedTableEntry)))
+#endif // TARGET_WINNT
+static SharedTableEntry __offload_myo_shared_table_start = { 0 };
+
+#if HOST_LIBRARY
+// offload myo shared var init section prolog
+ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(InitTableEntry)))
+#endif // TARGET_WINNT
+static InitTableEntry __offload_myo_shared_init_table_start = { 0 };
+#endif
+
+// offload myo fptr section prolog
+ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_START)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FptrTableEntry)))
+#endif // TARGET_WINNT
+static FptrTableEntry __offload_myo_fptr_table_start = { 0 };
+
+#endif // MYO_SUPPORT
+
+// init/fini code which adds/removes local lookup data to/from the global list
+
+static void offload_fini();
+
+#ifndef TARGET_WINNT
+static void offload_init() __attribute__((constructor(101)));
+#else // TARGET_WINNT
+static void offload_init();
+
+// Place offload initialization before user constructors
+ALLOCATE(OFFLOAD_CRTINIT_SECTION_START)
+static void (*addressof_offload_init)() = offload_init;
+#endif // TARGET_WINNT
+
+static void offload_init()
+{
+    // register offload tables
+    __offload_register_tables(&__offload_entry_node,
+                              &__offload_func_node,
+                              &__offload_var_node);
+
+#if HOST_LIBRARY
+    __offload_register_image(&__offload_target_image);
+    atexit(offload_fini);
+#endif // HOST_LIBRARY
+
+#ifdef MYO_SUPPORT
+    __offload_myoRegisterTables(
+#if HOST_LIBRARY
+        &__offload_myo_shared_init_table_start + 1,
+#endif // HOST_LIBRARY
+        &__offload_myo_shared_table_start + 1,
+        &__offload_myo_fptr_table_start + 1
+    );
+#endif // MYO_SUPPORT
+}
+
+static void offload_fini()
+{
+#if HOST_LIBRARY
+    __offload_unregister_image(&__offload_target_image);
+#endif // HOST_LIBRARY
+
+    // unregister offload tables
+    __offload_unregister_tables(&__offload_entry_node,
+                                &__offload_func_node,
+                                &__offload_var_node);
+}

Added: openmp/trunk/offload/src/ofldend.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/ofldend.cpp?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/ofldend.cpp (added)
+++ openmp/trunk/offload/src/ofldend.cpp Wed Apr  9 10:40:23 2014
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if HOST_LIBRARY
+#include "offload_host.h"
+#include "offload_myo_host.h"
+#else
+#include "offload_target.h"
+#include "offload_myo_target.h"
+#endif
+
+#ifdef TARGET_WINNT
+#define ALLOCATE(name) __declspec(allocate(name))
+#else // TARGET_WINNT
+#define ALLOCATE(name) __attribute__((section(name)))
+#endif // TARGET_WINNT
+
+// offload entry table
+ALLOCATE(OFFLOAD_ENTRY_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_entry_table_end = { (const char*)-1 };
+
+// offload function table
+ALLOCATE(OFFLOAD_FUNC_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FuncTable::Entry)))
+#endif // TARGET_WINNT
+static FuncTable::Entry __offload_func_table_end = { (const char*)-1 };
+
+// data table
+ALLOCATE(OFFLOAD_VAR_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(VarTable::Entry)))
+#endif // TARGET_WINNT
+static VarTable::Entry __offload_var_table_end = { (const char*)-1 };
+
+#ifdef MYO_SUPPORT
+
+// offload myo shared var section epilog
+ALLOCATE(OFFLOAD_MYO_SHARED_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(SharedTableEntry)))
+static SharedTableEntry __offload_myo_shared_table_end = { (const char*)-1, 0 };
+#else // TARGET_WINNT
+static SharedTableEntry __offload_myo_shared_table_end = { 0 };
+#endif // TARGET_WINNT
+
+#if HOST_LIBRARY
+// offload myo shared var init section epilog
+ALLOCATE(OFFLOAD_MYO_SHARED_INIT_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(InitTableEntry)))
+static InitTableEntry __offload_myo_shared_init_table_end = { (const char*)-1, 0 };
+#else // TARGET_WINNT
+static InitTableEntry __offload_myo_shared_init_table_end = { 0 };
+#endif // TARGET_WINNT
+#endif // HOST_LIBRARY
+
+// offload myo fptr section epilog
+ALLOCATE(OFFLOAD_MYO_FPTR_TABLE_SECTION_END)
+#ifdef TARGET_WINNT
+__declspec(align(sizeof(FptrTableEntry)))
+static FptrTableEntry __offload_myo_fptr_table_end = { (const char*)-1, 0, 0 };
+#else // TARGET_WINNT
+static FptrTableEntry __offload_myo_fptr_table_end = { 0 };
+#endif // TARGET_WINNT
+
+#endif // MYO_SUPPORT

Added: openmp/trunk/offload/src/orsl-lite/include/orsl-lite.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/orsl-lite/include/orsl-lite.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/orsl-lite/include/orsl-lite.h (added)
+++ openmp/trunk/offload/src/orsl-lite/include/orsl-lite.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,221 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef _ORSL_LITE_H_
+#define _ORSL_LITE_H_
+
+#ifndef TARGET_WINNT
+#include <sched.h>
+#else
+#define cpu_set_t int
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Type of a ORSLBusySet */
+typedef enum ORSLBusySetType {
+    BUSY_SET_EMPTY = 0,     /**< Empty set */
+    BUSY_SET_PARTIAL = 1,   /**< Non-empty set that omits some threads */
+    BUSY_SET_FULL = 2       /**< A set that includes all threads on the card */
+} BusySetType;
+
+/** ORSLBusySet encapsulation */
+typedef struct ORSLBusySet {
+    BusySetType type;   /**< Set type */
+#ifdef __linux__
+    cpu_set_t cpu_set;  /**< CPU mask (unused for BUSY_SET_EMPTY and
+                           BUSY_SET_PARTIAL sets) represented by the standard
+                           Linux CPU set type -- cpu_set_t. Threads are numbered
+                           starting from 0. The maximal possible thread number
+                           is system-specific. See CPU_SET(3) family of macros
+                           for more details. Unused in ORSL Lite. */
+#endif
+} ORSLBusySet;
+
+/** Client tag */
+typedef char* ORSLTag;
+
+/** Maximal length of tag in characters */
+#define ORSL_MAX_TAG_LEN 128
+
+/** Maximal number of cards that can be managed by ORSL */
+#define ORSL_MAX_CARDS 32
+
+/** Reserves computational resources on a set of cards. Blocks.
+ *
+ * If any of the resources cannot be reserved, this function will block until
+ * they become available. Reservation can be recursive if performed by the
+ * same tag. A recursively reserved resource must be released the same number
+ * of times it was reserved.
+ *
+ * @see ORSLTryReserve
+ *
+ * @param[in]  n      Number of cards to reserve resources on. Cannot be < 0
+ *                    or > ORSL_MAX_CARDS.
+ *
+ * @param[in]  inds   Indices of the cards: an integer array with n elements.
+ *                    Cannot be NULL if n > 0. Valid card indices are from 0
+ *                    to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[in]  bsets  Requested resources on each of the card. Cannot be NULL
+ *                    if n > 0.
+ *
+ * @param[in]  tag    ORSLTag of the calling client. Cannot be NULL. Length
+ *                    must not exeed ORSL_MAX_TAG_LEN.
+ *
+ * @returns    0      if the resources were successfully reserved
+ *
+ * @returns    EINVAL if any of the arguments is invalid
+ *
+ * @returns    EAGAIN limit of recursive reservations reached
+ *                    (not in ORSL Lite)
+ *
+ * @returns    ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ *                    equal to BUSY_SET_PARTIAL
+ */
+int ORSLReserve(const int n, const int *__restrict inds,
+                const ORSLBusySet *__restrict bsets,
+                const ORSLTag __restrict tag);
+
+/** Reserves computational resources on a set of cards. Does not block.
+ *
+ * If any of the resources cannot be reserved, this function will return
+ * immediately. Reservation can be recursive if performed by the same tag.
+ * A recursively reserved resource must be released the same number of times
+ * it was reserved.
+ *
+ * @see ORSLReserve
+ *
+ * @param[in]  n      Number of cards to reserve resources on. Cannot be < 0
+ *                    or > ORSL_MAX_CARDS.
+ *
+ * @param[in]  inds     Indices of the cards: an integer array with n elements.
+ *                      Cannot be NULL if n > 0. Valid card indices are from 0
+ *                      to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[inout] bsets  Requested resources on each of the card. Cannot be
+ *                      NULL if n > 0.
+ *
+ * @param[in]    tag    ORSLTag of the calling client. Cannot be NULL. Length
+ *                      must not exceed ORSL_MAX_TAG_LEN.
+ *
+ * @returns      0      if the resources were successfully reserved
+ *
+ * @returns      EBUSY  if some of the requested resources are busy
+ *
+ * @returns      EINVAL if any of the arguments is invalid
+ *
+ * @returns      EAGAIN limit of recursive reservations reached
+ *                      (not in ORSL Lite)
+ *
+ * @returns      ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ *                      equal to BUSY_SET_PARTIAL
+ */
+int ORSLTryReserve(const int n, const int *__restrict inds,
+                   const ORSLBusySet *__restrict bsets,
+                   const ORSLTag __restrict tag);
+
+/** Granularify of partial reservation */
+typedef enum ORSLPartialGranularity {
+    GRAN_CARD = 0, /**< Card granularity */
+    GRAN_THREAD = 1 /**< Thread granularity */
+} ORSLPartialGranularity;
+
+/** Requests reservation of some of computational resources on a set of cards.
+ * Does not block. Updates user-provided bsets to indicate which resources
+ * were reserved.
+ *
+ * If any of the resources cannot be reserved, this function will update busy
+ * sets provided by the caller to reflect what resources were actually
+ * reserved. This function supports two granularity modes: 'card' and
+ * 'thread'.  When granularity is set to 'card', a failure to reserve a thread
+ * on the card will imply that reservation has failed for the whole card. When
+ * granularity is set to 'thread', reservation on a card will be considered
+ * successful as long as at least one thread on the card was successfully
+ * reserved. Reservation can be recursive if performed by the same tag. A
+ * recursively reserved resource must be released the same number of times it
+ * was reserved.
+ *
+ * @param[in]  gran   Reservation granularity
+ *
+ * @param[in]  n      Number of cards to reserve resources on. Cannot be < 0
+ *                    or > ORSL_MAX_CARDS.
+ *
+ * @param[in]  inds   Indices of the cards: an integer array with n elements.
+ *                    Cannot be NULL if n > 0. Valid card indices are from 0
+ *                    to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[in]  bsets  Requested resources on each of the card. Cannot be NULL
+ *                    if n > 0.
+ *
+ * @param[in]  tag    ORSLTag of the calling client. Cannot be NULL. Length
+ *                    must not exceed ORSL_MAX_TAG_LEN.
+ *
+ * @returns    0      if at least some of the resources were successfully
+ *                    reserved
+ *
+ * @returns    EBUSY  if all of the requested resources are busy
+ *
+ * @returns    EINVAL if any of the arguments is invalid
+ *
+ * @returns    EAGAIN limit of recursive reservations reached
+ *                    (not in ORSL Lite)
+ *
+ * @returns    ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ *                    equal to BUSY_SET_PARTIAL
+ */
+int ORSLReservePartial(const ORSLPartialGranularity gran, const int n,
+                       const int *__restrict inds,
+                       ORSLBusySet *__restrict bsets,
+                       const ORSLTag __restrict tag);
+
+/** Releases previously reserved computational resources on a set of cards.
+ *
+ * This function will fail if any of the resources to be released were not
+ * reserved by the calling client.
+ *
+ * @see ORSLReserve
+ * @see ORSLTryReserve
+ * @see ORSLReservePartial
+ *
+ * @param[in]  n      Number of cards to reserve resources on. Cannot be < 0
+ *                    or > ORSL_MAX_CARDS.
+ *
+ * @param[in]  inds   Indices of the cards: an integer array with n elements.
+ *                    Cannot be NULL if n > 0. Valid card indices are from 0
+ *                    to ORSL_MAX_CARDS-1. Cannot contain duplicate elements.
+ *
+ * @param[in]  bsets  Requested resources on each of the card. Cannot be NULL
+ *                    if n > 0.
+ *
+ * @param[in]  tag    ORSLTag of the calling client. Cannot be NULL. Length
+ *                    must not exceed ORSL_MAX_TAG_LEN.
+ *
+ * @returns    0      if the resources were successfully released
+ *
+ * @returns    EINVAL if any of the arguments is invalid
+ *
+ * @returns    EPERM  the calling client did not reserve some of the
+ *                    resources it is trying to release.
+ *
+ * @returns    ENOSYS (in ORSL Lite) if type of any of the busy sets is
+ *                    equal to BUSY_SET_PARTIAL
+ */
+int ORSLRelease(const int n, const int *__restrict inds,
+                const ORSLBusySet *__restrict bsets,
+                const ORSLTag __restrict tag);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

Added: openmp/trunk/offload/src/orsl-lite/lib/orsl-lite.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/orsl-lite/lib/orsl-lite.c?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/orsl-lite/lib/orsl-lite.c (added)
+++ openmp/trunk/offload/src/orsl-lite/lib/orsl-lite.c Wed Apr  9 10:40:23 2014
@@ -0,0 +1,337 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "orsl-lite/include/orsl-lite.h"
+
+#define DISABLE_SYMBOL_VERSIONING
+
+#if defined(__linux__) && !defined(DISABLE_SYMBOL_VERSIONING)
+#define symver(src, tgt, verstr) __asm__(".symver " #src "," #tgt verstr)
+symver(ORSLReserve0, ORSLReserve, "@@ORSL_0.0");
+symver(ORSLTryReserve0, ORSLTryReserve, "@@ORSL_0.0");
+symver(ORSLReservePartial0, ORSLReservePartial, "@@ORSL_0.0");
+symver(ORSLRelease0, ORSLRelease, "@@ORSL_0.0");
+#else
+#define ORSLReserve0 ORSLReserve
+#define ORSLTryReserve0 ORSLTryReserve
+#define ORSLReservePartial0 ORSLReservePartial
+#define ORSLRelease0 ORSLRelease
+#endif
+
+#ifdef __linux__
+#include <pthread.h>
+static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t release_cond = PTHREAD_COND_INITIALIZER;
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#pragma intrinsic(_ReadWriteBarrier)
+static SRWLOCK global_mutex = SRWLOCK_INIT;
+static volatile int release_cond_initialized = 0;
+static CONDITION_VARIABLE release_cond;
+
+static void state_lazy_init_sync()
+{
+    if (!release_cond_initialized) {
+        AcquireSRWLockExclusive(&global_mutex);
+        _ReadWriteBarrier();
+        if (!release_cond_initialized) {
+            InitializeConditionVariable(&release_cond);
+            release_cond_initialized = 1;
+        }
+        ReleaseSRWLockExclusive(&global_mutex);
+    }
+}
+#endif
+
+static int state_lock()
+{
+#ifdef __linux__
+    return pthread_mutex_lock(&global_mutex);
+#endif
+
+#ifdef _WIN32
+    AcquireSRWLockExclusive(&global_mutex);
+    return 0;
+#endif
+}
+
+static int state_unlock()
+{
+#ifdef __linux__
+    return pthread_mutex_unlock(&global_mutex);
+#endif
+
+#ifdef _WIN32
+    ReleaseSRWLockExclusive(&global_mutex);
+    return 0;
+#endif
+}
+
+static int state_wait_for_release()
+{
+#ifdef __linux__
+    return pthread_cond_wait(&release_cond, &global_mutex);
+#endif
+
+#ifdef _WIN32
+    return SleepConditionVariableSRW(&release_cond,
+            &global_mutex, INFINITE, 0) == 0 ? 1 : 0;
+#endif
+}
+
+static int state_signal_release()
+{
+#ifdef __linux__
+    return pthread_cond_signal(&release_cond);
+#endif
+
+#ifdef _WIN32
+    WakeConditionVariable(&release_cond);
+    return 0;
+#endif
+}
+
+static struct {
+    char owner[ORSL_MAX_TAG_LEN + 1];
+    unsigned long rsrv_cnt;
+} rsrv_data[ORSL_MAX_CARDS];
+
+static int check_args(const int n, const int *__restrict inds,
+                      const ORSLBusySet *__restrict bsets,
+                      const ORSLTag __restrict tag)
+{
+    int i;
+    int card_specified[ORSL_MAX_CARDS];
+    if (tag == NULL) return -1;
+    if (strlen((char *)tag) > ORSL_MAX_TAG_LEN) return -1;
+    if (n < 0 || n >= ORSL_MAX_CARDS) return -1;
+    if (n != 0 && (inds == NULL || bsets == NULL)) return -1;
+    for (i = 0; i < ORSL_MAX_CARDS; i++)
+        card_specified[i] = 0;
+    for (i = 0; i < n; i++) {
+        int ind = inds[i];
+        if (ind < 0 || ind >= ORSL_MAX_CARDS) return -1;
+        if (card_specified[ind]) return -1;
+        card_specified[ind] = 1;
+    }
+    return 0;
+}
+
+static int check_bsets(const int n, const ORSLBusySet *bsets)
+{
+    int i;
+    for (i = 0; i < n; i++)
+        if (bsets[i].type == BUSY_SET_PARTIAL) return -1;
+    return 0;
+}
+
+static int can_reserve_card(int card, const ORSLBusySet *__restrict bset,
+                            const ORSLTag __restrict tag)
+{
+    assert(tag != NULL);
+    assert(bset != NULL);
+    assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+    assert(bset->type != BUSY_SET_PARTIAL);
+
+    return (bset->type == BUSY_SET_EMPTY ||
+            ((rsrv_data[card].rsrv_cnt == 0 ||
+            strncmp((char *)tag,
+                rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0) &&
+            rsrv_data[card].rsrv_cnt < ULONG_MAX)) ? 0 : - 1;
+}
+
+static void reserve_card(int card, const ORSLBusySet *__restrict bset,
+                         const ORSLTag __restrict tag)
+{
+    assert(tag != NULL);
+    assert(bset != NULL);
+    assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+    assert(bset->type != BUSY_SET_PARTIAL);
+
+    if (bset->type == BUSY_SET_EMPTY)
+        return;
+
+    assert(rsrv_data[card].rsrv_cnt == 0 ||
+            strncmp((char *)tag,
+                rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0);
+    assert(rsrv_data[card].rsrv_cnt < ULONG_MAX);
+
+    if (rsrv_data[card].rsrv_cnt == 0)
+        strncpy(rsrv_data[card].owner, (char *)tag, ORSL_MAX_TAG_LEN);
+    rsrv_data[card].owner[ORSL_MAX_TAG_LEN] = '\0';
+    rsrv_data[card].rsrv_cnt++;
+}
+
+static int can_release_card(int card, const ORSLBusySet *__restrict bset,
+                            const ORSLTag __restrict tag)
+{
+    assert(tag != NULL);
+    assert(bset != NULL);
+    assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+    assert(bset->type != BUSY_SET_PARTIAL);
+
+    return (bset->type == BUSY_SET_EMPTY || (rsrv_data[card].rsrv_cnt > 0 &&
+                strncmp((char *)tag,
+                    rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0)) ? 0 : 1;
+}
+
+static void release_card(int card, const ORSLBusySet *__restrict bset,
+                         const ORSLTag __restrict tag)
+{
+    assert(tag != NULL);
+    assert(bset != NULL);
+    assert(strlen((char *)tag) < ORSL_MAX_TAG_LEN);
+    assert(bset->type != BUSY_SET_PARTIAL);
+
+    if (bset->type == BUSY_SET_EMPTY)
+        return;
+
+    assert(strncmp((char *)tag,
+                rsrv_data[card].owner, ORSL_MAX_TAG_LEN) == 0);
+    assert(rsrv_data[card].rsrv_cnt > 0);
+
+    rsrv_data[card].rsrv_cnt--;
+}
+
+int ORSLReserve0(const int n, const int *__restrict inds,
+                const ORSLBusySet *__restrict bsets,
+                const ORSLTag __restrict tag)
+{
+    int i, ok;
+
+    if (n == 0) return 0;
+    if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+    if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+    state_lock();
+
+    /* Loop until we find that all the resources we want are available */
+    do {
+        ok = 1;
+        for (i = 0; i < n; i++)
+            if (can_reserve_card(inds[i], &bsets[i], tag) != 0) {
+                ok = 0;
+                /* Wait for someone to release some resources */
+                state_wait_for_release();
+                break;
+            }
+    } while (!ok);
+
+    /* At this point we are good to reserve_card the resources we want */
+    for (i = 0; i < n; i++)
+        reserve_card(inds[i], &bsets[i], tag);
+
+    state_unlock();
+    return 0;
+}
+
+int ORSLTryReserve0(const int n, const int *__restrict inds,
+                   const ORSLBusySet *__restrict bsets,
+                   const ORSLTag __restrict tag)
+{
+    int i, rc = EBUSY;
+
+    if (n == 0) return 0;
+    if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+    if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+    state_lock();
+
+    /* Check resource availability once */
+    for (i = 0; i < n; i++)
+        if (can_reserve_card(inds[i], &bsets[i], tag) != 0)
+            goto bail_out;
+
+    /* At this point we are good to reserve the resources we want */
+    for (i = 0; i < n; i++)
+        reserve_card(inds[i], &bsets[i], tag);
+
+    rc = 0;
+
+bail_out:
+    state_unlock();
+    return rc;
+}
+
+int ORSLReservePartial0(const ORSLPartialGranularity gran, const int n,
+                       const int *__restrict inds, ORSLBusySet *__restrict bsets,
+                       const ORSLTag __restrict tag)
+{
+    int rc = EBUSY;
+    int i, num_avail = n;
+
+    if (n == 0) return 0;
+    if (gran != GRAN_CARD && gran != GRAN_THREAD) return EINVAL;
+    if (gran != GRAN_CARD) return EINVAL;
+    if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+    if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+    state_lock();
+
+    /* Check resource availability once; remove unavailable resources from the
+     * user-provided list */
+    for (i = 0; i < n; i++)
+        if (can_reserve_card(inds[i], &bsets[i], tag) != 0) {
+            num_avail--;
+            bsets[i].type = BUSY_SET_EMPTY;
+        }
+
+    if (num_avail == 0)
+        goto bail_out;
+
+    /* At this point we are good to reserve the resources we want */
+    for (i = 0; i < n; i++)
+        reserve_card(inds[i], &bsets[i], tag);
+
+    rc = 0;
+
+bail_out:
+    state_unlock();
+    return rc;
+}
+
+int ORSLRelease0(const int n, const int *__restrict inds,
+                const ORSLBusySet *__restrict bsets,
+                const ORSLTag __restrict tag)
+{
+    int i, rc = EPERM;
+
+    if (n == 0) return 0;
+    if (check_args(n, inds, bsets, tag) != 0) return EINVAL;
+    if (check_bsets(n, bsets) != 0) return ENOSYS;
+
+    state_lock();
+
+    /* Check that we can release all the resources */
+    for (i = 0; i < n; i++)
+        if (can_release_card(inds[i], &bsets[i], tag) != 0)
+            goto bail_out;
+
+    /* At this point we are good to release the resources we want */
+    for (i = 0; i < n; i++)
+        release_card(inds[i], &bsets[i], tag);
+
+    state_signal_release();
+
+    rc = 0;
+
+bail_out:
+    state_unlock();
+    return rc;
+}
+
+/* vim:set et: */

Added: openmp/trunk/offload/src/orsl-lite/version.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/orsl-lite/version.txt?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/orsl-lite/version.txt (added)
+++ openmp/trunk/offload/src/orsl-lite/version.txt Wed Apr  9 10:40:23 2014
@@ -0,0 +1 @@
+ORSL-lite 0.7

Added: openmp/trunk/offload/src/rdtsc.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/rdtsc.h?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/rdtsc.h (added)
+++ openmp/trunk/offload/src/rdtsc.h Wed Apr  9 10:40:23 2014
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+uint64_t _rdtsc()
+{
+  uint32_t eax, edx;
+  asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
+  return ((uint64_t)edx << 32) | eax;
+}

Added: openmp/trunk/offload/src/use_mpss2.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/use_mpss2.txt?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/use_mpss2.txt (added)
+++ openmp/trunk/offload/src/use_mpss2.txt Wed Apr  9 10:40:23 2014
@@ -0,0 +1 @@
+2.1.6720-13

Added: openmp/trunk/offload/src/use_mpss_win.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/offload/src/use_mpss_win.txt?rev=205909&view=auto
==============================================================================
--- openmp/trunk/offload/src/use_mpss_win.txt (added)
+++ openmp/trunk/offload/src/use_mpss_win.txt Wed Apr  9 10:40:23 2014
@@ -0,0 +1 @@
+2.1.6720-13

Modified: openmp/trunk/www/index.html
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/www/index.html?rev=205909&r1=205908&r2=205909&view=diff
==============================================================================
--- openmp/trunk/www/index.html (original)
+++ openmp/trunk/www/index.html Wed Apr  9 10:40:23 2014
@@ -49,6 +49,18 @@
     keep the different sites in sync.
   </p>
 
+  <p>Support for the parts of the OpenMP 4.0 language that are not
+  associated with the "target" constructs are contained in the
+  "runtime" directory. Support for offloading computation via the
+  "target" directive is in the separate "offload" directory. That
+  builds a library that provides the interfaces for transferring code
+  and data to an attached computational device. Initial support here
+  is for the Intel&reg Xeon Phi&#0153 coprocessor, but work is
+  beginning to support other attached computing devices, and the
+  design is intended to be general. The README.txt in the "offload"
+  directory describes how to build the offload library.
+  </p>
+
   <p>We are, however, still missing test code. Any contributions in
     that area would be greatly appreciated!
   </p>





More information about the Openmp-commits mailing list