From openmp-commits at lists.llvm.org Wed Jul 2 04:55:47 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Wed, 02 Jul 2025 04:55:47 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <68651e43.050a0220.229596.10e3@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 1/2] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 2/2] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; From openmp-commits at lists.llvm.org Wed Jul 2 04:58:21 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Wed, 02 Jul 2025 04:58:21 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <68651edd.050a0220.157535.079c@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- offload/include/PerThreadTable.h offload/include/OpenMP/InteropAPI.h offload/include/OpenMP/omp.h offload/include/PluginManager.h offload/include/Shared/APITypes.h offload/libomptarget/OffloadRTL.cpp offload/libomptarget/OpenMP/API.cpp offload/libomptarget/OpenMP/InteropAPI.cpp offload/libomptarget/PluginManager.cpp offload/plugins-nextgen/common/include/PluginInterface.h openmp/runtime/src/kmp.h openmp/runtime/src/kmp_barrier.cpp openmp/runtime/src/kmp_runtime.cpp openmp/runtime/src/kmp_tasking.cpp ``````````
View the diff from clang-format here. ``````````diff diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f8580..2ecf3b599 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,7 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " + DP("Warning: nowait flag on interop use with dependences not supported " + "yet. " "Ignored\n"); nowait = false; } @@ -338,7 +339,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +359,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, ``````````
https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 2 05:13:53 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Wed, 02 Jul 2025 05:13:53 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <68652281.170a0220.179c63.fc7e@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 1/3] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 2/3] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 3/3] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, From openmp-commits at lists.llvm.org Wed Jul 2 22:58:04 2025 From: openmp-commits at lists.llvm.org (Bernhard M. Wiedemann via Openmp-commits) Date: Wed, 02 Jul 2025 22:58:04 -0700 (PDT) Subject: [Openmp-commits] [openmp] Drop timestamp in generated source code (PR #146822) Message-ID: https://github.com/bmwiedemann created https://github.com/llvm/llvm-project/pull/146822 Fixes #72206 This helps reproducible builds of libomp.so -- probably because LLVM's LTO computed a hash of inputs to generate its symbol names. note: if it is desired to keep the timestamp, we could instead use [`SOURCE_DATE_EPOCH`](https://reproducible-builds.org/docs/source-date-epoch/) to make it deterministic. This PR was done while working on [reproducible builds for openSUSE](https://en.opensuse.org/openSUSE:Reproducible_Builds). >From d11da8017ba6e7bb925a8997b7c488243a26a537 Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Thu, 3 Jul 2025 07:50:30 +0200 Subject: [PATCH] Drop timestamp in generated source code Fixes #72206 This helps reproducible builds of libomp.so probably because LLVM's LTO computed a hash of inputs to generate its symbol names. This patch was done while working on reproducible builds for openSUSE. --- openmp/runtime/tools/message-converter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/openmp/runtime/tools/message-converter.py b/openmp/runtime/tools/message-converter.py index a493d64c1692d..5e2aeba6c99d6 100644 --- a/openmp/runtime/tools/message-converter.py +++ b/openmp/runtime/tools/message-converter.py @@ -11,7 +11,6 @@ # import argparse -import datetime import os import platform import re @@ -188,11 +187,10 @@ def insert_header(f, data, commentChar="//"): f.write( "{0} Do not edit this file! {0}\n" "{0} The file was generated from" - " {1} by {2} on {3}. {0}\n\n".format( + " {1} by {2}. {0}\n\n".format( commentChar, os.path.basename(data.filename), os.path.basename(__file__), - datetime.datetime.now().ctime(), ) ) From openmp-commits at lists.llvm.org Wed Jul 2 22:58:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Wed, 02 Jul 2025 22:58:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] Drop timestamp in generated source code (PR #146822) In-Reply-To: Message-ID: <68661c00.630a0220.129a82.431d@mx.google.com> github-actions[bot] wrote: Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using `@` followed by their GitHub username. If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html). You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/). https://github.com/llvm/llvm-project/pull/146822 From openmp-commits at lists.llvm.org Thu Jul 3 08:25:36 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 03 Jul 2025 08:25:36 -0700 (PDT) Subject: [Openmp-commits] [openmp] Drop timestamp in generated source code (PR #146822) In-Reply-To: Message-ID: <6866a0f0.170a0220.3a4e43.d59b@mx.google.com> https://github.com/jprotze commented: Lgtm. Would still give @jpeyton52 a chance to look at this change. https://github.com/llvm/llvm-project/pull/146822 From openmp-commits at lists.llvm.org Thu Jul 3 08:27:39 2025 From: openmp-commits at lists.llvm.org (Jonathan Peyton via Openmp-commits) Date: Thu, 03 Jul 2025 08:27:39 -0700 (PDT) Subject: [Openmp-commits] [openmp] Drop timestamp in generated source code (PR #146822) In-Reply-To: Message-ID: <6866a16b.170a0220.3c1fad.dc1a@mx.google.com> https://github.com/jpeyton52 approved this pull request. https://github.com/llvm/llvm-project/pull/146822 From openmp-commits at lists.llvm.org Thu Jul 3 08:30:37 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 03 Jul 2025 08:30:37 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <6866a21d.170a0220.2e264d.dd63@mx.google.com> https://github.com/jprotze approved this pull request. Lgtm https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Thu Jul 3 09:18:50 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 03 Jul 2025 09:18:50 -0700 (PDT) Subject: [Openmp-commits] [openmp] b6e113a - Drop timestamp in generated source code (#146822) Message-ID: <6866ad6a.170a0220.205d56.e945@mx.google.com> Author: Bernhard M. Wiedemann Date: 2025-07-03T18:18:47+02:00 New Revision: b6e113a149bfa196a90a260f69f06b0070bd3670 URL: https://github.com/llvm/llvm-project/commit/b6e113a149bfa196a90a260f69f06b0070bd3670 DIFF: https://github.com/llvm/llvm-project/commit/b6e113a149bfa196a90a260f69f06b0070bd3670.diff LOG: Drop timestamp in generated source code (#146822) Fixes #72206 This helps reproducible builds of libomp.so -- probably because LLVM's LTO computed a hash of inputs to generate its symbol names. note: if it is desired to keep the timestamp, we could instead use [`SOURCE_DATE_EPOCH`](https://reproducible-builds.org/docs/source-date-epoch/) to make it deterministic. This PR was done while working on [reproducible builds for openSUSE](https://en.opensuse.org/openSUSE:Reproducible_Builds). Added: Modified: openmp/runtime/tools/message-converter.py Removed: ################################################################################ diff --git a/openmp/runtime/tools/message-converter.py b/openmp/runtime/tools/message-converter.py index a493d64c1692d..5e2aeba6c99d6 100644 --- a/openmp/runtime/tools/message-converter.py +++ b/openmp/runtime/tools/message-converter.py @@ -11,7 +11,6 @@ # import argparse -import datetime import os import platform import re @@ -188,11 +187,10 @@ def insert_header(f, data, commentChar="//"): f.write( "{0} Do not edit this file! {0}\n" "{0} The file was generated from" - " {1} by {2} on {3}. {0}\n\n".format( + " {1} by {2}. {0}\n\n".format( commentChar, os.path.basename(data.filename), os.path.basename(__file__), - datetime.datetime.now().ctime(), ) ) From openmp-commits at lists.llvm.org Thu Jul 3 09:18:53 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 03 Jul 2025 09:18:53 -0700 (PDT) Subject: [Openmp-commits] [openmp] Drop timestamp in generated source code (PR #146822) In-Reply-To: Message-ID: <6866ad6d.170a0220.195c3d.fda7@mx.google.com> https://github.com/jprotze closed https://github.com/llvm/llvm-project/pull/146822 From openmp-commits at lists.llvm.org Thu Jul 3 09:19:12 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 03 Jul 2025 09:19:12 -0700 (PDT) Subject: [Openmp-commits] [openmp] Drop timestamp in generated source code (PR #146822) In-Reply-To: Message-ID: <6866ad80.630a0220.2bae.0c47@mx.google.com> github-actions[bot] wrote: @bmwiedemann Congratulations on having your first Pull Request (PR) merged into the LLVM Project! Your changes will be combined with recent changes from other authors, then tested by our [build bots](https://lab.llvm.org/buildbot/). If there is a problem with a build, you may receive a report in an email or a comment on this PR. Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues. How to do this, and the rest of the post-merge process, is covered in detail [here](https://llvm.org/docs/MyFirstTypoFix.html#myfirsttypofix-issues-after-landing-your-pr). If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of [LLVM development](https://llvm.org/docs/DeveloperPolicy.html#patch-reversion-policy). You can fix your changes and open a new PR to merge them again. If you don't get any reports, no action is required from you. Your changes are working as expected, well done! https://github.com/llvm/llvm-project/pull/146822 From openmp-commits at lists.llvm.org Thu Jul 3 10:02:42 2025 From: openmp-commits at lists.llvm.org (Jonathan Peyton via Openmp-commits) Date: Thu, 03 Jul 2025 10:02:42 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <6866b7b2.050a0220.2b102a.131f@mx.google.com> https://github.com/jpeyton52 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Mon Jul 7 12:48:26 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Mon, 07 Jul 2025 12:48:26 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686c248a.a70a0220.e6de0.b5aa@mx.google.com> mhalk wrote: While being aware that this change is of substantial size, we chose to start the reviewing process to gather feedback. If it is desired to provide this change in smaller chunks, please provide suggestions. Also, please pull in other reviewers as needed. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 7 12:55:20 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 07 Jul 2025 12:55:20 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #146403) In-Reply-To: Message-ID: <686c2628.170a0220.a997e.035a@mx.google.com> jprotze wrote: I'm wondering whether the runtime code is sufficient to implement 6.0 runtime error behavior for the strict modifier. As I understand the runtime change in this PR, it only catches the case for serialized parallel regions. I think, runtime error termination should catch whenever requested nthreads < effective nthreads. It could also be the case due to max-active-levels-var, or exceeding the thread-limit. At the end, the reason doesn't really matter. The error from the build bot is confusing without having more output available from the error case. https://github.com/llvm/llvm-project/pull/146403 From openmp-commits at lists.llvm.org Mon Jul 7 13:02:02 2025 From: openmp-commits at lists.llvm.org (Shilei Tian via Openmp-commits) Date: Mon, 07 Jul 2025 13:02:02 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #146403) In-Reply-To: Message-ID: <686c27ba.630a0220.3c4bbe.b2d9@mx.google.com> shiltian wrote: https://github.com/llvm/llvm-project/pull/85466 should be the one that supports other cases? https://github.com/llvm/llvm-project/pull/146403 From openmp-commits at lists.llvm.org Mon Jul 7 13:03:44 2025 From: openmp-commits at lists.llvm.org (Robert Imschweiler via Openmp-commits) Date: Mon, 07 Jul 2025 13:03:44 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #146403) In-Reply-To: Message-ID: <686c2820.050a0220.328041.025e@mx.google.com> ro-i wrote: > I'm wondering whether the runtime code is sufficient to implement 6.0 runtime error behavior for the strict modifier. As I understand the runtime change in this PR, it only catches the case for serialized parallel regions. The other cases are already covered by https://github.com/llvm/llvm-project/pull/85466/files https://github.com/llvm/llvm-project/pull/146403 From openmp-commits at lists.llvm.org Mon Jul 7 14:56:16 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Mon, 07 Jul 2025 14:56:16 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686c4280.170a0220.1fd935.9f48@mx.google.com> jplehr wrote: It seems we forgot to add the appropriate license statement in the source files. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 8 05:16:29 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 05:16:29 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <686d0c1d.170a0220.9283c.c5b8@mx.google.com> https://github.com/jprotze dismissed https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Tue Jul 8 05:16:32 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 05:16:32 -0700 (PDT) Subject: [Openmp-commits] [openmp] 0f391d6 - [OpenMP] OpenMP ThreadSet clause - basic runtime (#144409) Message-ID: <686d0c20.170a0220.9aa4.c0a6@mx.google.com> Author: Ritanya-B-Bharadwaj Date: 2025-07-08T14:16:29+02:00 New Revision: 0f391d6f51217de5cb6735b17f359eb078bbe94e URL: https://github.com/llvm/llvm-project/commit/0f391d6f51217de5cb6735b17f359eb078bbe94e DIFF: https://github.com/llvm/llvm-project/commit/0f391d6f51217de5cb6735b17f359eb078bbe94e.diff LOG: [OpenMP] OpenMP ThreadSet clause - basic runtime (#144409) Initial runtime support for threadset clause in task and taskloop directives [Section 14.8 in in OpenMP 6.0 spec] Frontend PR- https://github.com/llvm/llvm-project/pull/135807 Added: Modified: openmp/runtime/src/kmp.h Removed: ################################################################################ diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..f62cabee6ea84 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2725,11 +2725,12 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) /* Same fields as in the #else branch, but in reverse order */ #if OMPX_TASKGRAPH - unsigned reserved31 : 5; + unsigned reserved31 : 4; unsigned onced : 1; #else - unsigned reserved31 : 6; + unsigned reserved31 : 5; #endif + unsigned hidden_helper : 1; unsigned target : 1; unsigned native : 1; unsigned freed : 1; @@ -2741,7 +2742,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned task_serial : 1; unsigned tasktype : 1; unsigned reserved : 8; - unsigned hidden_helper : 1; + unsigned free_agent_eligible : 1; unsigned detachable : 1; unsigned priority_specified : 1; unsigned proxy : 1; @@ -2762,7 +2763,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned priority_specified : 1; /* set if the compiler provides priority setting for the task */ unsigned detachable : 1; /* 1 == can detach */ - unsigned hidden_helper : 1; /* 1 == hidden helper task */ + unsigned free_agent_eligible : 1; /* set if task can be executed by a + free-agent thread */ unsigned reserved : 8; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ @@ -2780,11 +2782,12 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned freed : 1; /* 1==freed, 0==allocated */ unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ unsigned target : 1; + unsigned hidden_helper : 1; /* 1 == hidden helper task */ #if OMPX_TASKGRAPH unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay purposes */ - unsigned reserved31 : 5; /* reserved for library use */ + unsigned reserved31 : 4; /* reserved for library use */ #else - unsigned reserved31 : 6; /* reserved for library use */ + unsigned reserved31 : 5; /* reserved for library use */ #endif #endif } kmp_tasking_flags_t; From openmp-commits at lists.llvm.org Tue Jul 8 05:16:36 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 05:16:36 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <686d0c24.170a0220.3520b4.8764@mx.google.com> https://github.com/jprotze closed https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Tue Jul 8 05:19:40 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 05:19:40 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <686d0cdc.170a0220.3328d5.2b59@mx.google.com> Ritanya-B-Bharadwaj wrote: @jprotze the frontend PR for this feature is still open - https://github.com/llvm/llvm-project/pull/135807 Can you please review it? https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Tue Jul 8 05:21:16 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 05:21:16 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] [NFC] Remove dead code: building task stack (PR #143589) In-Reply-To: Message-ID: <686d0d3c.170a0220.21f6a9.be76@mx.google.com> https://github.com/jprotze approved this pull request. Lgtm All removed code is guarded with `BUILD_TIED_TASK_STACK` and the PR removes all uses of this macro https://github.com/llvm/llvm-project/pull/143589 From openmp-commits at lists.llvm.org Tue Jul 8 05:45:47 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 08 Jul 2025 05:45:47 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686d12fb.170a0220.63aab.333f@mx.google.com> https://github.com/mhalk updated https://github.com/llvm/llvm-project/pull/147381 >From f024da45acd71e4dfe65d3d9cc6b86cf9cb7cc69 Mon Sep 17 00:00:00 2001 From: Michael Halkenhaeuser Date: Wed, 2 Jul 2025 05:32:04 -0500 Subject: [PATCH] [OpenMP] Add ompTest library to OpenMP Description =========== OpenMP Tooling Interface Testing Library (ompTest) ompTest is a unit testing framework for testing OpenMP implementations. It offers a simple-to-use framework that allows a tester to check for OMPT events in addition to regular unit testing code, supported by linking against GoogleTest by default. It also facilitates writing concise tests while bridging the semantic gap between the unit under test and the OMPT-event testing. Background ========== This library has been developed to provide the means of testing OMPT implementations with reasonable effort. Especially, asynchronous or unordered events are supported and can be verified with ease, which may prove to be challenging with LIT-based tests. Additionally, since the assertions are part of the code being tested, ompTest can reference all corresponding variables during assertion. Basic Usage =========== OMPT event assertions are placed before the code, which shall be tested. These assertion can either be provided as one block or interleaved with the test code. There are two types of asserters: (1) sequenced "order-sensitive" and (2) set "unordered" assserters. Once the test is being run, the corresponding events are triggered by the OpenMP runtime and can be observed. Each of these observed events notifies asserters, which then determine if the test should pass or fail. Example (partial, interleaved) ============================== int N = 100000; int a[N]; int b[N]; OMPT_ASSERT_SEQUENCE(Target, TARGET, BEGIN, 0); OMPT_ASSERT_SEQUENCE(TargetDataOp, ALLOC, N * sizeof(int)); // a ? OMPT_ASSERT_SEQUENCE(TargetDataOp, H2D, N * sizeof(int), &a); OMPT_ASSERT_SEQUENCE(TargetDataOp, ALLOC, N * sizeof(int)); // b ? OMPT_ASSERT_SEQUENCE(TargetDataOp, H2D, N * sizeof(int), &b); OMPT_ASSERT_SEQUENCE(TargetSubmit, 1); OMPT_ASSERT_SEQUENCE(TargetDataOp, D2H, N * sizeof(int), nullptr, &b); OMPT_ASSERT_SEQUENCE(TargetDataOp, D2H, N * sizeof(int), nullptr, &a); OMPT_ASSERT_SEQUENCE(TargetDataOp, DELETE); OMPT_ASSERT_SEQUENCE(TargetDataOp, DELETE); OMPT_ASSERT_SEQUENCE(Target, TARGET, END, 0); { for (int j = 0; j < N; j++) a[j] = b[j]; } References ========== This work has been presented at SC'24 workshops, see: https://ieeexplore.ieee.org/document/10820689 Current State and Future Work ============================= ompTest's development was mostly device-centric and aimed at OMPT device callbacks and device-side tracing. Consequentially, a substantial part of host-related events or features may not be supported in its current state. However, we are confident that the related functionality can be added and ompTest provides a general foundation for future OpenMP and especially OMPT testing. This PR will allow us to upstream the corresponding features, like OMPT device-side tracing in the future with significantly reduced risk of introducing regressions in the process. Build ===== ompTest is linked against LLVM's GoogleTest by default, but can also be built 'standalone'. Additionally, it comes with a set of unit tests, which in turn require GoogleTest (overriding a standalone build). The unit tests are added to the `check-openmp` target. Use the following parameters to perform the corresponding build: `LIBOMPTEST_BUILD_STANDALONE` (Default: OFF) `LIBOMPTEST_BUILD_UNITTESTS` (Default: OFF) --------- Co-authored-by: Jan-Patrick Lehr --- openmp/README.rst | 1 + openmp/tools/omptest/CMakeLists.txt | 116 ++++ openmp/tools/omptest/README.md | 279 +++++++++ .../omptest/cmake/omptest-config.cmake.in | 29 + openmp/tools/omptest/include/AssertMacros.h | 138 ++++ openmp/tools/omptest/include/InternalEvent.h | 331 ++++++++++ .../omptest/include/InternalEventCommon.h | 133 ++++ openmp/tools/omptest/include/Logging.h | 155 +++++ openmp/tools/omptest/include/OmptAliases.h | 85 +++ .../tools/omptest/include/OmptAssertEvent.h | 377 +++++++++++ openmp/tools/omptest/include/OmptAsserter.h | 291 +++++++++ .../omptest/include/OmptCallbackHandler.h | 165 +++++ openmp/tools/omptest/include/OmptTester.h | 60 ++ .../tools/omptest/include/OmptTesterGlobals.h | 36 ++ .../omptest/include/OmptTesterGoogleTest.h | 86 +++ .../omptest/include/OmptTesterStandalone.h | 123 ++++ openmp/tools/omptest/src/InternalEvent.cpp | 367 +++++++++++ .../omptest/src/InternalEventOperators.cpp | 366 +++++++++++ openmp/tools/omptest/src/Logging.cpp | 177 ++++++ openmp/tools/omptest/src/OmptAssertEvent.cpp | 587 ++++++++++++++++++ openmp/tools/omptest/src/OmptAsserter.cpp | 480 ++++++++++++++ .../tools/omptest/src/OmptCallbackHandler.cpp | 445 +++++++++++++ openmp/tools/omptest/src/OmptTester.cpp | 504 +++++++++++++++ .../omptest/src/OmptTesterStandalone.cpp | 147 +++++ openmp/tools/omptest/test/CMakeLists.txt | 28 + openmp/tools/omptest/test/lit.cfg | 26 + openmp/tools/omptest/test/lit.site.cfg.in | 9 + .../test/unittests/asserter-seq-test.cpp | 358 +++++++++++ .../test/unittests/internal-event-test.cpp | 530 ++++++++++++++++ .../test/unittests/internal-util-test.cpp | 95 +++ .../omptest/test/unittests/main-test.cpp | 141 +++++ 31 files changed, 6665 insertions(+) create mode 100644 openmp/tools/omptest/CMakeLists.txt create mode 100644 openmp/tools/omptest/README.md create mode 100644 openmp/tools/omptest/cmake/omptest-config.cmake.in create mode 100644 openmp/tools/omptest/include/AssertMacros.h create mode 100644 openmp/tools/omptest/include/InternalEvent.h create mode 100644 openmp/tools/omptest/include/InternalEventCommon.h create mode 100644 openmp/tools/omptest/include/Logging.h create mode 100644 openmp/tools/omptest/include/OmptAliases.h create mode 100644 openmp/tools/omptest/include/OmptAssertEvent.h create mode 100644 openmp/tools/omptest/include/OmptAsserter.h create mode 100644 openmp/tools/omptest/include/OmptCallbackHandler.h create mode 100644 openmp/tools/omptest/include/OmptTester.h create mode 100644 openmp/tools/omptest/include/OmptTesterGlobals.h create mode 100644 openmp/tools/omptest/include/OmptTesterGoogleTest.h create mode 100644 openmp/tools/omptest/include/OmptTesterStandalone.h create mode 100644 openmp/tools/omptest/src/InternalEvent.cpp create mode 100644 openmp/tools/omptest/src/InternalEventOperators.cpp create mode 100644 openmp/tools/omptest/src/Logging.cpp create mode 100644 openmp/tools/omptest/src/OmptAssertEvent.cpp create mode 100644 openmp/tools/omptest/src/OmptAsserter.cpp create mode 100644 openmp/tools/omptest/src/OmptCallbackHandler.cpp create mode 100644 openmp/tools/omptest/src/OmptTester.cpp create mode 100644 openmp/tools/omptest/src/OmptTesterStandalone.cpp create mode 100644 openmp/tools/omptest/test/CMakeLists.txt create mode 100644 openmp/tools/omptest/test/lit.cfg create mode 100644 openmp/tools/omptest/test/lit.site.cfg.in create mode 100644 openmp/tools/omptest/test/unittests/asserter-seq-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/internal-event-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/internal-util-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/main-test.cpp diff --git a/openmp/README.rst b/openmp/README.rst index 2dfc8630858b8..c34d3e8a40d7d 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -369,6 +369,7 @@ There are following check-* make targets for tests. - ``check-ompt`` (ompt tests under runtime/test/ompt) - ``check-ompt-multiplex`` (ompt multiplex tests under tools/multiplex/tests) +- ``check-ompt-omptest`` (ompt omptest tests under tools/omptest/tests) - ``check-libarcher`` (libarcher tests under tools/archer/tests) - ``check-libomp`` (libomp tests under runtime/test. This includes check-ompt tests too) - ``check-libomptarget-*`` (libomptarget tests for specific target under libomptarget/test) diff --git a/openmp/tools/omptest/CMakeLists.txt b/openmp/tools/omptest/CMakeLists.txt new file mode 100644 index 0000000000000..19f9f898f4300 --- /dev/null +++ b/openmp/tools/omptest/CMakeLists.txt @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) + return() +endif() + +set(OMPTEST_HEADERS + ./include/AssertMacros.h + ./include/InternalEvent.h + ./include/InternalEventCommon.h + ./include/Logging.h + ./include/OmptAliases.h + ./include/OmptAsserter.h + ./include/OmptAssertEvent.h + ./include/OmptCallbackHandler.h + ./include/OmptTester.h + ./include/OmptTesterGlobals.h +) + +add_library(omptest + SHARED + + ${OMPTEST_HEADERS} + ./src/InternalEvent.cpp + ./src/InternalEventOperators.cpp + ./src/Logging.cpp + ./src/OmptAsserter.cpp + ./src/OmptAssertEvent.cpp + ./src/OmptCallbackHandler.cpp + ./src/OmptTester.cpp +) + +# Target: ompTest library +# On (implicit) request of GoogleTest, link against the one provided with LLVM. +if ((NOT LIBOMPTEST_BUILD_STANDALONE) OR LIBOMPTEST_BUILD_UNITTESTS) + # Check if standalone build was requested together with unittests + if (LIBOMPTEST_BUILD_STANDALONE) + # Emit warning: this build actually depends on LLVM's GoogleTest + message(WARNING "LIBOMPTEST_BUILD_STANDALONE and LIBOMPTEST_BUILD_UNITTESTS" + " requested simultaneously.\n" + "Linking against LLVM's GoogleTest library archives.\n" + "Disable LIBOMPTEST_BUILD_UNITTESTS to perform an actual" + " standalone build.") + # Explicitly disable LIBOMPTEST_BUILD_STANDALONE + set(LIBOMPTEST_BUILD_STANDALONE OFF) + endif() + + # Use LLVM's gtest library archive + set(GTEST_LIB "${LLVM_BINARY_DIR}/lib/libllvm_gtest.a") + # Link gtest as whole-archive to expose required symbols + set(GTEST_LINK_CMD "-Wl,--whole-archive" ${GTEST_LIB} + "-Wl,--no-whole-archive" LLVMSupport) + + # Add GoogleTest-based header + target_sources(omptest PRIVATE ./include/OmptTesterGoogleTest.h) + + # Add LLVM-provided GoogleTest include directories. + target_include_directories(omptest PRIVATE + ${LLVM_THIRD_PARTY_DIR}/unittest/googletest/include) + + # TODO: Re-visit ABI breaking checks, disable for now. + target_compile_definitions(omptest PUBLIC + -DLLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING) + + # Link against gtest and gtest_main + target_link_libraries(omptest PRIVATE ${GTEST_LINK_CMD}) +else() + # Add 'standalone' compile definitions + target_compile_definitions(omptest PRIVATE + -DOPENMP_LIBOMPTEST_BUILD_STANDALONE) + + # Add 'standalone' source files + target_sources(omptest PRIVATE + ./include/OmptTesterStandalone.h + ./src/OmptTesterStandalone.cpp) +endif() + +# Add common include directories. +target_include_directories(omptest PRIVATE + ./include + ${LIBOMPTARGET_INCLUDE_DIR}) +target_compile_features(omptest PRIVATE cxx_std_17) + +# Create and install package configuration files. +configure_file( + ${omptest_SOURCE_DIR}/cmake/omptest-config.cmake.in + ${omptest_BINARY_DIR}/cmake/omptest-config.cmake @ONLY) + +install(FILES ${omptest_BINARY_DIR}/cmake/omptest-config.cmake + DESTINATION "${OPENMP_INSTALL_LIBDIR}/cmake/openmp/omptest") + +# Install libomptest header files: Copy header-files from include dir +install(DIRECTORY ./include + DESTINATION "${LIBOMP_HEADERS_INSTALL_PATH}/omptest" + FILES_MATCHING PATTERN "*.h") + +install(TARGETS omptest LIBRARY COMPONENT omptest + DESTINATION "${OPENMP_INSTALL_LIBDIR}") + +# Discover unit tests (added to check-openmp) +if(LIBOMPTEST_BUILD_UNITTESTS) + add_subdirectory(test) +endif() diff --git a/openmp/tools/omptest/README.md b/openmp/tools/omptest/README.md new file mode 100644 index 0000000000000..bfed871b59bdb --- /dev/null +++ b/openmp/tools/omptest/README.md @@ -0,0 +1,279 @@ + +README for the OpenMP Tooling Interface Testing Library (ompTest) +================================================================= + +# Introduction +OpenMP Tooling Interface Testing Library (ompTest) +ompTest is a unit testing framework for testing OpenMP implementations. +It offers a simple-to-use framework that allows a tester to check for OMPT +events in addition to regular unit testing code, supported by linking against +GoogleTest by default. It also facilitates writing concise tests while bridging +the semantic gap between the unit under test and the OMPT-event testing. + +# Testing macros + +Corresponding macro definitions are located in: `./include/AssertMacros.h` + +## OMPT_GENERATE_EVENTS(NumberOfCopies, EventMacro) +`TODO` + +## OMPT_ASSERT_SET_EVENT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_GROUPED(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NAMED(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_EVENT_NOT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NOT(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_GROUPED_NOT(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NAMED_NOT(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_EVENT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE(EventTy, ...) +This macro checks for the occurrence of the provided event, which also +entails the exact sequence of events. When only using this assertion macro one +has to provide every single event in the exact order of occurrence. + +## OMPT_ASSERT_SEQUENCE_GROUPED(Group, EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE` with the addition of grouping. + +## OMPT_ASSERT_SEQUENCE_NAMED(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_NOT(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_GROUPED_NOT(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_NAMED_NOT(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_SUSPEND() +`TODO` + +## OMPT_ASSERT_SEQUENCE_ONLY(EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE`, while actually being preceded +-AND- succeeded by commands to suspend sequenced assertion until the next match. +As a result, one may omit all other "unneccessary" events from the sequence. + +## OMPT_ASSERT_SEQUENCE_GROUPED_ONLY(Group, EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE_ONLY`, plus grouping. + +## OMPT_ASSERT_SEQUENCE_NAMED_ONLY(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERTER_MODE_STRICT(Asserter) +`TODO` + +## OMPT_ASSERTER_MODE_RELAXED(Asserter) +`TODO` + +## OMPT_ASSERT_SEQUENCE_MODE_STRICT() +`TODO` + +## OMPT_ASSERT_SEQUENCE_MODE_RELAXED() +`TODO` + +## OMPT_ASSERT_SET_MODE_STRICT() +`TODO` + +## OMPT_ASSERT_SET_MODE_RELAXED() +`TODO` + +## OMPT_ASSERTER_DISABLE(Asserter) +`TODO` + +## OMPT_ASSERTER_ENABLE(Asserter) +`TODO` + +## OMPT_ASSERT_SET_DISABLE() +`TODO` + +## OMPT_ASSERT_SET_ENABLE() +`TODO` + +## OMPT_ASSERT_SEQUENCE_DISABLE() +`TODO` + +## OMPT_ASSERT_SEQUENCE_ENABLE() +`TODO` + +## OMPT_REPORT_EVENT_DISABLE() +`TODO` + +## OMPT_REPORT_EVENT_ENABLE() +`TODO` + +## OMPT_ASSERTER_PERMIT_EVENT(Asserter, EventTy) +`TODO` + +## OMPT_ASSERTER_SUPPRESS_EVENT(Asserter, EventTy) +`TODO` + +## OMPT_PERMIT_EVENT(EventTy) +`TODO` + +## OMPT_SUPPRESS_EVENT(EventTy) +`TODO` + +## OMPT_ASSERTER_LOG_LEVEL(Asserter, LogLevel) +`TODO` + +## OMPT_ASSERTER_LOG_FORMATTED(Asserter, FormatLog) +`TODO` + +## OMPT_ASSERT_SYNC_POINT(SyncPointName) +`TODO` + +### Grouping Asserts + +This allows to generate and verify data during runtime of a test. +Currently, we only use target region information which manifests into groups. +This allows to correlate multiple events to a certain target region without +manual interaction just by specifying a groupname for these events. + +When a target region is encountered and we are about to enter it, we gather the +`target_id` (non-EMI) -OR- `target_data->value` (EMI). This value is stored +along the groupname for future reference. Upon target region end, the +corresponding group is erased. (Note: The groupname is available again.) + +Other asserted callbacks which may occur within target regions query their +groupname: retrieving and comparing the value of the group against the observed +event's value. + +### Suspending Sequenced Asserts + +When a sequence of events is not of interest while testing, these additional +events may be ignored by suspending the assertion until the next match. This +can be done by using `OMPT_ASSERT_SEQUENCE_SUSPEND` manually or the `_ONLY` +macro variants, like `OMPT_ASSERT_GROUPED_SEQUENCE_ONLY`. + +The former adds a special event to the queue of expected events and signal +that any non-matching event should be ignored rather than failing the test. +`_ONLY` macros embed their corresponding macro between two calls to +`OMPT_ASSERT_SEQUENCE_SUSPEND`. As a consequence, we enter passive assertion +until a match occurs, then enter passive assertion again. This enables us to +"only" assert a certain, single event in arbitrary circumstances. + +### Asserter Modes +`TODO` + +## Aliases (shorthands) +To allow for easier writing of tests and enhanced readability, the following set +of aliases is introduced. The left hand side represents the original value, +while the right hand side depicts the shorthand version. + +| Type | Enum Value | Shorthand | +|---------------------------|---------------------------------------------|---------------------------| +| **ompt_scope_endpoint_t** | | | +| | ompt_scope_begin | BEGIN | +| | ompt_scope_end | END | +| | ompt_scope_beginend | BEGINEND | +| **ompt_target_t** | | | +| | ompt_target | TARGET | +| | ompt_target_enter_data | ENTER_DATA | +| | ompt_target_exit_data | EXIT_DATA | +| | ompt_target_update | UPDATE | +| | ompt_target_nowait | TARGET_NOWAIT | +| | ompt_target_enter_data_nowait | ENTER_DATA_NOWAIT | +| | ompt_target_exit_data_nowait | EXIT_DATA_NOWAIT | +| | ompt_target_update_nowait | UPDATE_NOWAIT | +| **ompt_target_data_op_t** | | | +| | ompt_target_data_alloc | ALLOC | +| | ompt_target_data_transfer_to_device | H2D | +| | ompt_target_data_transfer_from_device | D2H | +| | ompt_target_data_delete | DELETE | +| | ompt_target_data_associate | ASSOCIATE | +| | ompt_target_data_disassociate | DISASSOCIATE | +| | ompt_target_data_alloc_async | ALLOC_ASYNC | +| | ompt_target_data_transfer_to_device_async | H2D_ASYNC | +| | ompt_target_data_transfer_from_device_async | D2H_ASYNC | +| | ompt_target_data_delete_async | DELETE_ASYNC | +| **ompt_callbacks_t** | | | +| | ompt_callback_target | CB_TARGET | +| | ompt_callback_target_data_op | CB_DATAOP | +| | ompt_callback_target_submit | CB_KERNEL | +| **ompt_work_t** | | | +| | ompt_work_loop | WORK_LOOP | +| | ompt_work_sections | WORK_SECT | +| | ompt_work_single_executor | WORK_EXEC | +| | ompt_work_single_other | WORK_SINGLE | +| | ompt_work_workshare | WORK_SHARE | +| | ompt_work_distribute | WORK_DIST | +| | ompt_work_taskloop | WORK_TASK | +| | ompt_work_scope | WORK_SCOPE | +| | ompt_work_loop_static | WORK_LOOP_STA | +| | ompt_work_loop_dynamic | WORK_LOOP_DYN | +| | ompt_work_loop_guided | WORK_LOOP_GUI | +| | ompt_work_loop_other | WORK_LOOP_OTH | +| **ompt_sync_region_t** | | | +| | ompt_sync_region_barrier | SR_BARRIER | +| | ompt_sync_region_barrier_implicit | SR_BARRIER_IMPL | +| | ompt_sync_region_barrier_explicit | SR_BARRIER_EXPL | +| | ompt_sync_region_barrier_implementation | SR_BARRIER_IMPLEMENTATION | +| | ompt_sync_region_taskwait | SR_TASKWAIT | +| | ompt_sync_region_taskgroup | SR_TASKGROUP | +| | ompt_sync_region_reduction | SR_REDUCTION | +| | ompt_sync_region_barrier_implicit_workshare | SR_BARRIER_IMPL_WORKSHARE | +| | ompt_sync_region_barrier_implicit_parallel | SR_BARRIER_IMPL_PARALLEL | +| | ompt_sync_region_barrier_teams | SR_BARRIER_TEAMS | + + +Limitations +=========== +Currently, there are some peculiarities which have to be kept in mind when using +this library: + +## Callbacks + * It is not possible to e.g. test non-EMI -AND- EMI callbacks within the same + test file. Reason: all testsuites share the initialization and therefore the + registered callbacks. + * It is not possible to check for device initialization and/or load callbacks + more than once per test file. The first testcase being run, triggers these + callbacks and is therefore the only testcase that is able to check for them. + This is because, after that, the device remains initialized. + * It is not possible to check for device finalization callbacks, as libomptest + is un-loaded before this callback occurs. Same holds true for the final + ThreadEnd event(s). + +Miscellaneous +============= + +## Default values + +To allow for easier writing of tests, many OMPT events may be created using less +parameters than actually requested by the spec -- by using default values. These +defaults are currently set to the corresponding data type's minimum as follows, +for example integers use: `std::numeric_limits::min()`. + +When an expected / user-specified event has certain values set to the +corresponding default, these values are ignored. That is, when compared to an +observed event, this property is considered as 'equal' regardless of their +actual equality relation. + +References +========== +[0]: ompTest – Unit Testing with OMPT + https://doi.org/10.1109/SCW63240.2024.00031 + +[1]: OMPTBench – OpenMP Tool Interface Conformance Testing + https://doi.org/10.1109/SCW63240.2024.00036 diff --git a/openmp/tools/omptest/cmake/omptest-config.cmake.in b/openmp/tools/omptest/cmake/omptest-config.cmake.in new file mode 100644 index 0000000000000..dca02505539b0 --- /dev/null +++ b/openmp/tools/omptest/cmake/omptest-config.cmake.in @@ -0,0 +1,29 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package. +## +################################################################################ + +# Compute installation prefix relative to this file. +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) + +# Provide header and library paths. +set(LIBOMP_HEADERS_INSTALL_PATH "${LLVM_INSTALL_PREFIX}/@LIBOMP_HEADERS_INSTALL_PATH@") +set(LIBOMP_LIBRARY_INSTALL_PATH "${LLVM_INSTALL_PREFIX}/@OPENMP_INSTALL_LIBDIR@") +set(omptest_INCLUDE_DIR "${LIBOMP_HEADERS_INSTALL_PATH}/omptest/include") +set(omptest_LIBRARY_DIR "${LIBOMP_LIBRARY_INSTALL_PATH}") + +# Provide compiler default values. +set(LLVM_BIN_INSTALL_DIR "${LLVM_INSTALL_PREFIX}/bin") +set(omptest_C_COMPILER "${LLVM_BIN_INSTALL_DIR}/clang") +set(omptest_CXX_COMPILER "${LLVM_BIN_INSTALL_DIR}/clang++") + +# Provide information, if ompTest has been built 'standalone'. +set(LIBOMPTEST_BUILD_STANDALONE "@LIBOMPTEST_BUILD_STANDALONE@") diff --git a/openmp/tools/omptest/include/AssertMacros.h b/openmp/tools/omptest/include/AssertMacros.h new file mode 100644 index 0000000000000..d5d191c10dabb --- /dev/null +++ b/openmp/tools/omptest/include/AssertMacros.h @@ -0,0 +1,138 @@ +//===- AssertMacros.h - Macro aliases for ease-of-use -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides macros to be used in unit tests for OMPT events. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_ASSERTMACROS_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_ASSERTMACROS_H + +#define OMPTEST_EXCLUDED_EVENT omptest::ObserveState::never +#define OMPTEST_REQUIRED_EVENT omptest::ObserveState::always + +/// ASSERT MACROS TO BE USED BY THE USER + +#define OMPT_GENERATE_EVENTS(NumberOfCopies, EventMacro) \ + for (size_t i = 0; i < NumberOfCopies; ++i) { \ + EventMacro \ + } + +// Handle a minimum unordered set of events +// Required events +#define OMPT_ASSERT_SET_EVENT(Name, Group, EventTy, ...) \ + SetAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_REQUIRED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SET(EventTy, ...) \ + OMPT_ASSERT_SET_EVENT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_GROUPED(Group, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_NAMED(Name, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT(Name, "", EventTy, __VA_ARGS__) +// Excluded ("NOT") events +#define OMPT_ASSERT_SET_EVENT_NOT(Name, Group, EventTy, ...) \ + SetAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_EXCLUDED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SET_NOT(EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_GROUPED_NOT(Group, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_NAMED_NOT(Name, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT(Name, "", EventTy, __VA_ARGS__) + +// Handle an exact sequence of events +// Required events +#define OMPT_ASSERT_SEQUENCE_EVENT(Name, Group, EventTy, ...) \ + SequenceAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_REQUIRED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SEQUENCE(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_GROUPED(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_NAMED(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT(Name, "", EventTy, __VA_ARGS__) +// Excluded ("NOT") events +#define OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, Group, EventTy, ...) \ + SequenceAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_EXCLUDED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SEQUENCE_NOT(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_GROUPED_NOT(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_NAMED_NOT(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, "", EventTy, __VA_ARGS__) +// Special command: suspend active assertion +// The created event is not correlated to any observed event +#define OMPT_ASSERT_SEQUENCE_SUSPEND() \ + SequenceAsserter->insert( \ + OmptAssertEvent::AssertionSuspend("", "", OMPTEST_EXCLUDED_EVENT)); +#define OMPT_ASSERT_SEQUENCE_ONLY(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT("", "", EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() +#define OMPT_ASSERT_SEQUENCE_GROUPED_ONLY(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT("", Group, EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() +#define OMPT_ASSERT_SEQUENCE_NAMED_ONLY(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT(Name, "", EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() + +#define OMPT_ASSERTER_MODE_STRICT(Asserter) \ + Asserter->setOperationMode(AssertMode::strict); +#define OMPT_ASSERTER_MODE_RELAXED(Asserter) \ + Asserter->setOperationMode(AssertMode::relaxed); +#define OMPT_ASSERT_SEQUENCE_MODE_STRICT() \ + OMPT_ASSERTER_MODE_STRICT(SequenceAsserter) +#define OMPT_ASSERT_SEQUENCE_MODE_RELAXED() \ + OMPT_ASSERTER_MODE_RELAXED(SequenceAsserter) +#define OMPT_ASSERT_SET_MODE_STRICT() OMPT_ASSERTER_MODE_STRICT(SetAsserter) +#define OMPT_ASSERT_SET_MODE_RELAXED() OMPT_ASSERTER_MODE_RELAXED(SetAsserter) + +// Enable / disable asserters entirely +#define OMPT_ASSERTER_DISABLE(Asserter) Asserter->setActive(false); +#define OMPT_ASSERTER_ENABLE(Asserter) Asserter->setActive(true); +#define OMPT_ASSERT_SET_DISABLE() OMPT_ASSERTER_DISABLE(SetAsserter) +#define OMPT_ASSERT_SET_ENABLE() OMPT_ASSERTER_ENABLE(SetAsserter) +#define OMPT_ASSERT_SEQUENCE_DISABLE() OMPT_ASSERTER_DISABLE(SequenceAsserter) +#define OMPT_ASSERT_SEQUENCE_ENABLE() OMPT_ASSERTER_ENABLE(SequenceAsserter) +#define OMPT_REPORT_EVENT_DISABLE() OMPT_ASSERTER_DISABLE(EventReporter) +#define OMPT_REPORT_EVENT_ENABLE() OMPT_ASSERTER_ENABLE(EventReporter) + +// Enable / disable certain event types for asserters +#define OMPT_ASSERTER_PERMIT_EVENT(Asserter, EventTy) \ + Asserter->permitEvent(EventTy); +#define OMPT_ASSERTER_SUPPRESS_EVENT(Asserter, EventTy) \ + Asserter->suppressEvent(EventTy); +#define OMPT_PERMIT_EVENT(EventTy) \ + OMPT_ASSERTER_PERMIT_EVENT(SetAsserter, EventTy); \ + OMPT_ASSERTER_PERMIT_EVENT(EventReporter, EventTy); \ + OMPT_ASSERTER_PERMIT_EVENT(SequenceAsserter, EventTy); +#define OMPT_SUPPRESS_EVENT(EventTy) \ + OMPT_ASSERTER_SUPPRESS_EVENT(SetAsserter, EventTy); \ + OMPT_ASSERTER_SUPPRESS_EVENT(EventReporter, EventTy); \ + OMPT_ASSERTER_SUPPRESS_EVENT(SequenceAsserter, EventTy); + +// Set logging level for asserters +// Note: Logger is a singleton, hence this will affect all asserter instances +#define OMPT_ASSERTER_LOG_LEVEL(Asserter, LogLevel) \ + Asserter->getLog()->setLoggingLevel(LogLevel); + +// Set log formatting (esp. coloring) for asserters +// Note: Logger is a singleton, hence this will affect all asserter instances +#define OMPT_ASSERTER_LOG_FORMATTED(Asserter, FormatLog) \ + Asserter->getLog()->setFormatOutput(FormatLog); + +// SyncPoint handling +#define OMPT_ASSERT_SYNC_POINT(SyncPointName) \ + flush_traced_devices(); \ + OmptCallbackHandler::get().handleAssertionSyncPoint(SyncPointName); + +#endif diff --git a/openmp/tools/omptest/include/InternalEvent.h b/openmp/tools/omptest/include/InternalEvent.h new file mode 100644 index 0000000000000..455d8d996e5f4 --- /dev/null +++ b/openmp/tools/omptest/include/InternalEvent.h @@ -0,0 +1,331 @@ +//===- InternalEvent.h - Internal event representation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Declares internal event representations along the default CTOR definition. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H + +#include "InternalEventCommon.h" + +#include +#include +#include + +#define expectedDefault(TypeName) std::numeric_limits::min() + +namespace omptest { + +namespace util { + +/// String manipulation helper function. Takes up to 8 bytes of data and returns +/// their hexadecimal representation as string. The data can be expanded to the +/// given size in bytes and will by default be prefixed with '0x'. +std::string makeHexString(uint64_t Data, bool IsPointer = true, + size_t DataBytes = 0, bool ShowHexBase = true); + +} // namespace util + +namespace internal { +// clang-format off +event_class_w_custom_body(AssertionSyncPoint, \ + AssertionSyncPoint(const std::string &Name) \ + : InternalEvent(EventTy::AssertionSyncPoint), Name(Name) {} \ + \ + const std::string Name; \ +) +event_class_stub(AssertionSuspend) +event_class_w_custom_body(ThreadBegin, \ + ThreadBegin(ompt_thread_t ThreadType) \ + : InternalEvent(EventTy::ThreadBegin), ThreadType(ThreadType) {} \ + \ + ompt_thread_t ThreadType; \ +) +event_class_w_custom_body(ThreadEnd, \ + ThreadEnd() : InternalEvent(EventTy::ThreadEnd) {} \ +) +event_class_w_custom_body(ParallelBegin, \ + ParallelBegin(int NumThreads) \ + : InternalEvent(EventTy::ParallelBegin), NumThreads(NumThreads) {} \ + \ + unsigned int NumThreads; \ +) +event_class_w_custom_body(ParallelEnd, \ + ParallelEnd(ompt_data_t *ParallelData, ompt_data_t *EncounteringTaskData, \ + int Flags, const void *CodeptrRA) \ + : InternalEvent(EventTy::ParallelEnd), ParallelData(ParallelData), \ + EncounteringTaskData(EncounteringTaskData), Flags(Flags), \ + CodeptrRA(CodeptrRA) {} \ + \ +ompt_data_t *ParallelData; \ +ompt_data_t *EncounteringTaskData; \ +int Flags; \ +const void *CodeptrRA; \ +) +event_class_w_custom_body(Work, \ + Work(ompt_work_t WorkType, ompt_scope_endpoint_t Endpoint, \ + ompt_data_t *ParallelData, ompt_data_t *TaskData, uint64_t Count, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::Work), WorkType(WorkType), Endpoint(Endpoint), \ + ParallelData(ParallelData), TaskData(TaskData), Count(Count), \ + CodeptrRA(CodeptrRA) {} \ + \ +ompt_work_t WorkType; \ +ompt_scope_endpoint_t Endpoint; \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +uint64_t Count; \ +const void *CodeptrRA; \ +) +event_class_w_custom_body(Dispatch, \ + Dispatch(ompt_data_t *ParallelData, ompt_data_t *TaskData, \ + ompt_dispatch_t Kind, ompt_data_t Instance) \ + : InternalEvent(EventTy::Dispatch), ParallelData(ParallelData), \ + TaskData(TaskData), Kind(Kind), Instance(Instance) {} \ + \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +ompt_dispatch_t Kind; \ +ompt_data_t Instance; \ +) +event_class_w_custom_body(TaskCreate, \ + TaskCreate(ompt_data_t *EncounteringTaskData, \ + const ompt_frame_t *EncounteringTaskFrame, \ + ompt_data_t *NewTaskData, int Flags, int HasDependences, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::TaskCreate), \ + EncounteringTaskData(EncounteringTaskData), \ + EncounteringTaskFrame(EncounteringTaskFrame), NewTaskData(NewTaskData), \ + Flags(Flags), HasDependences(HasDependences), CodeptrRA(CodeptrRA) {} \ + \ +ompt_data_t *EncounteringTaskData; \ +const ompt_frame_t *EncounteringTaskFrame; \ +ompt_data_t *NewTaskData; \ +int Flags; \ +int HasDependences; \ +const void *CodeptrRA; \ +) +event_class_stub(Dependences) +event_class_stub(TaskDependence) +event_class_stub(TaskSchedule) +event_class_w_custom_body(ImplicitTask, \ + ImplicitTask(ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, \ + ompt_data_t *TaskData, unsigned int ActualParallelism, \ + unsigned int Index, int Flags) \ + : InternalEvent(EventTy::ImplicitTask), Endpoint(Endpoint), \ + ParallelData(ParallelData), TaskData(TaskData), \ + ActualParallelism(ActualParallelism), Index(Index), Flags(Flags) {} \ + \ +ompt_scope_endpoint_t Endpoint; \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +unsigned int ActualParallelism; \ +unsigned int Index; \ +int Flags; \ +) +event_class_stub(Masked) +event_class_w_custom_body(SyncRegion, \ + SyncRegion(ompt_sync_region_t Kind, ompt_scope_endpoint_t Endpoint, \ + ompt_data_t *ParallelData, ompt_data_t *TaskData, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::SyncRegion), Kind(Kind), Endpoint(Endpoint), \ + ParallelData(ParallelData), TaskData(TaskData), CodeptrRA(CodeptrRA) {} \ + \ +ompt_sync_region_t Kind; \ +ompt_scope_endpoint_t Endpoint; \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +const void *CodeptrRA; \ +) +event_class_stub(MutexAcquire) +event_class_stub(Mutex) +event_class_stub(NestLock) +event_class_stub(Flush) +event_class_stub(Cancel) +event_class_w_custom_body(Target, \ + Target(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, int DeviceNum, \ + ompt_data_t *TaskData, ompt_id_t TargetId, const void *CodeptrRA) \ + : InternalEvent(EventTy::Target), Kind(Kind), Endpoint(Endpoint), \ + DeviceNum(DeviceNum), TaskData(TaskData), TargetId(TargetId), \ + CodeptrRA(CodeptrRA) {} \ + \ + ompt_target_t Kind; \ + ompt_scope_endpoint_t Endpoint; \ + int DeviceNum; \ + ompt_data_t *TaskData; \ + ompt_id_t TargetId; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetEmi, \ + TargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, int DeviceNum, \ + ompt_data_t *TaskData, ompt_data_t *TargetTaskData, \ + ompt_data_t *TargetData, const void *CodeptrRA) \ + : InternalEvent(EventTy::TargetEmi), Kind(Kind), Endpoint(Endpoint), \ + DeviceNum(DeviceNum), TaskData(TaskData), \ + TargetTaskData(TargetTaskData), TargetData(TargetData), \ + CodeptrRA(CodeptrRA) {} \ + \ + ompt_target_t Kind; \ + ompt_scope_endpoint_t Endpoint; \ + int DeviceNum; \ + ompt_data_t *TaskData; \ + ompt_data_t *TargetTaskData; \ + ompt_data_t *TargetData; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetDataOp, \ + TargetDataOp(ompt_id_t TargetId, ompt_id_t HostOpId, \ + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, \ + void *DstAddr, int DstDeviceNum, size_t Bytes, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::TargetDataOp), TargetId(TargetId), \ + HostOpId(HostOpId), OpType(OpType), SrcAddr(SrcAddr), \ + SrcDeviceNum(SrcDeviceNum), DstAddr(DstAddr), \ + DstDeviceNum(DstDeviceNum), Bytes(Bytes), CodeptrRA(CodeptrRA) {} \ + \ + ompt_id_t TargetId; \ + ompt_id_t HostOpId; \ + ompt_target_data_op_t OpType; \ + void *SrcAddr; \ + int SrcDeviceNum; \ + void *DstAddr; \ + int DstDeviceNum; \ + size_t Bytes; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetDataOpEmi, \ + TargetDataOpEmi(ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetTaskData, \ + ompt_data_t *TargetData, ompt_id_t *HostOpId, \ + ompt_target_data_op_t OpType, void *SrcAddr, \ + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, \ + size_t Bytes, const void *CodeptrRA) \ + : InternalEvent(EventTy::TargetDataOpEmi), Endpoint(Endpoint), \ + TargetTaskData(TargetTaskData), TargetData(TargetData), \ + HostOpId(HostOpId), OpType(OpType), SrcAddr(SrcAddr), \ + SrcDeviceNum(SrcDeviceNum), DstAddr(DstAddr), \ + DstDeviceNum(DstDeviceNum), Bytes(Bytes), CodeptrRA(CodeptrRA) {} \ + \ + ompt_scope_endpoint_t Endpoint; \ + ompt_data_t *TargetTaskData; \ + ompt_data_t *TargetData; \ + ompt_id_t *HostOpId; \ + ompt_target_data_op_t OpType; \ + void *SrcAddr; \ + int SrcDeviceNum; \ + void *DstAddr; \ + int DstDeviceNum; \ + size_t Bytes; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetSubmit, \ + TargetSubmit(ompt_id_t TargetId, ompt_id_t HostOpId, \ + unsigned int RequestedNumTeams) \ + : InternalEvent(EventTy::TargetSubmit), TargetId(TargetId), \ + HostOpId(HostOpId), RequestedNumTeams(RequestedNumTeams) {} \ + \ + ompt_id_t TargetId; \ + ompt_id_t HostOpId; \ + unsigned int RequestedNumTeams; \ +) +event_class_w_custom_body(TargetSubmitEmi, \ + TargetSubmitEmi(ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetData, \ + ompt_id_t *HostOpId, unsigned int RequestedNumTeams) \ + : InternalEvent(EventTy::TargetSubmitEmi), Endpoint(Endpoint), \ + TargetData(TargetData), HostOpId(HostOpId), \ + RequestedNumTeams(RequestedNumTeams) {} \ + \ + ompt_scope_endpoint_t Endpoint; \ + ompt_data_t *TargetData; \ + ompt_id_t *HostOpId; \ + unsigned int RequestedNumTeams; \ +) +event_class_stub(ControlTool) +event_class_w_custom_body(DeviceInitialize, \ + DeviceInitialize(int DeviceNum, const char *Type, ompt_device_t *Device, \ + ompt_function_lookup_t LookupFn, const char *DocStr) \ + : InternalEvent(EventTy::DeviceInitialize), DeviceNum(DeviceNum), \ + Type(Type), Device(Device), LookupFn(LookupFn), DocStr(DocStr) {} \ + \ + int DeviceNum; \ + const char *Type; \ + ompt_device_t *Device; \ + ompt_function_lookup_t LookupFn; \ + const char *DocStr; \ +) +event_class_w_custom_body(DeviceFinalize, \ + DeviceFinalize(int DeviceNum) \ + : InternalEvent(EventTy::DeviceFinalize), DeviceNum(DeviceNum) {} \ + \ + int DeviceNum; \ +) +event_class_w_custom_body(DeviceLoad, \ + DeviceLoad(int DeviceNum, const char *Filename, int64_t OffsetInFile, \ + void *VmaInFile, size_t Bytes, void *HostAddr, void *DeviceAddr, \ + uint64_t ModuleId) \ + : InternalEvent(EventTy::DeviceLoad), DeviceNum(DeviceNum), \ + Filename(Filename), OffsetInFile(OffsetInFile), VmaInFile(VmaInFile), \ + Bytes(Bytes), HostAddr(HostAddr), DeviceAddr(DeviceAddr), \ + ModuleId(ModuleId) {} \ + \ + int DeviceNum; \ + const char *Filename; \ + int64_t OffsetInFile; \ + void *VmaInFile; \ + size_t Bytes; \ + void *HostAddr; \ + void *DeviceAddr; \ + uint64_t ModuleId; \ +) +event_class_stub(DeviceUnload) +event_class_w_custom_body(BufferRequest, \ + BufferRequest(int DeviceNum, ompt_buffer_t **Buffer, size_t *Bytes) \ + : InternalEvent(EventTy::BufferRequest), DeviceNum(DeviceNum), \ + Buffer(Buffer), Bytes(Bytes) {} \ + \ + int DeviceNum; \ + ompt_buffer_t **Buffer; \ + size_t *Bytes; \ +) +event_class_w_custom_body(BufferComplete, \ + BufferComplete(int DeviceNum, ompt_buffer_t *Buffer, size_t Bytes, \ + ompt_buffer_cursor_t Begin, int BufferOwned) \ + : InternalEvent(EventTy::BufferComplete), DeviceNum(DeviceNum), \ + Buffer(Buffer), Bytes(Bytes), Begin(Begin), BufferOwned(BufferOwned) {} \ + \ + int DeviceNum; \ + ompt_buffer_t *Buffer; \ + size_t Bytes; \ + ompt_buffer_cursor_t Begin; \ + int BufferOwned; \ +) +event_class_w_custom_body(BufferRecord, \ + BufferRecord(ompt_record_ompt_t *RecordPtr) \ + : InternalEvent(EventTy::BufferRecord), RecordPtr(RecordPtr) { \ + if (RecordPtr != nullptr) Record = *RecordPtr; \ + else memset(&Record, 0, sizeof(ompt_record_ompt_t)); \ + } \ + \ + ompt_record_ompt_t Record; \ + ompt_record_ompt_t *RecordPtr; \ +) +event_class_w_custom_body(BufferRecordDeallocation, \ + BufferRecordDeallocation(ompt_buffer_t *Buffer) \ + : InternalEvent(EventTy::BufferRecordDeallocation), Buffer(Buffer) {} \ + \ + ompt_buffer_t *Buffer; \ +) +// clang-format on + +} // namespace internal + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/InternalEventCommon.h b/openmp/tools/omptest/include/InternalEventCommon.h new file mode 100644 index 0000000000000..e48eeddd975ed --- /dev/null +++ b/openmp/tools/omptest/include/InternalEventCommon.h @@ -0,0 +1,133 @@ +//===- InternalEventCommon.h - Common internal event basics -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides event types, and class/operator declaration macros. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENTCOMMON_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENTCOMMON_H + +#include "omp-tools.h" + +#include +#include + +namespace omptest { + +namespace internal { +/// Enum values are used for comparison of observed and asserted events +/// List is based on OpenMP 5.2 specification, table 19.2 (page 447) +enum class EventTy { + None, // not part of OpenMP spec, used for implementation + AssertionSyncPoint, // not part of OpenMP spec, used for implementation + AssertionSuspend, // not part of OpenMP spec, used for implementation + BufferRecord, // not part of OpenMP spec, used for implementation + BufferRecordDeallocation, // not part of OpenMP spec, used for implementation + ThreadBegin, + ThreadEnd, + ParallelBegin, + ParallelEnd, + Work, + Dispatch, + TaskCreate, // TODO: Implement + Dependences, // TODO: Implement + TaskDependence, // TODO: Implement + TaskSchedule, // TODO: Implement + ImplicitTask, // TODO: Implement + Masked, // TODO: Implement + SyncRegion, + MutexAcquire, // TODO: Implement + Mutex, // TODO: Implement + NestLock, // TODO: Implement + Flush, // TODO: Implement + Cancel, // TODO: Implement + DeviceInitialize, + DeviceFinalize, + DeviceLoad, + DeviceUnload, + BufferRequest, + BufferComplete, + TargetDataOp, + TargetDataOpEmi, + Target, + TargetEmi, + TargetSubmit, + TargetSubmitEmi, + ControlTool +}; + +struct InternalEvent { + EventTy Type; + EventTy getType() const { return Type; } + + InternalEvent() : Type(EventTy::None) {} + InternalEvent(EventTy T) : Type(T) {} + virtual ~InternalEvent() = default; + + virtual bool equals(const InternalEvent *o) const { + assert(false && "Base class implementation"); + return false; + }; + + virtual std::string toString() const { + std::string S{"InternalEvent: Type="}; + S.append(std::to_string((uint32_t)Type)); + return S; + } +}; + +#define event_class_stub(EvTy) \ + struct EvTy : public InternalEvent { \ + virtual bool equals(const InternalEvent *o) const override; \ + EvTy() : InternalEvent(EventTy::EvTy) {} \ + }; + +#define event_class_w_custom_body(EvTy, ...) \ + struct EvTy : public InternalEvent { \ + virtual bool equals(const InternalEvent *o) const override; \ + std::string toString() const override; \ + __VA_ARGS__ \ + }; + +#define event_class_operator_stub(EvTy) \ + bool operator==(const EvTy &Expected, const EvTy &Observed) { return true; } + +#define event_class_operator_w_body(EvTy, ...) \ + bool operator==(const EvTy &Expected, const EvTy &Observed) { __VA_ARGS__ } + +/// Template "base" for the cast functions generated in the define_cast_func +/// macro +template const To *cast(const InternalEvent *From) { + return nullptr; +} + +/// Generates template specialization of the cast operation for the specified +/// EvTy as the template parameter +#define define_cast_func(EvTy) \ + template <> const EvTy *cast(const InternalEvent *From) { \ + if (From->getType() == EventTy::EvTy) \ + return static_cast(From); \ + return nullptr; \ + } + +/// Auto generate the equals override to cast and dispatch to the specific class +/// operator== +#define class_equals_op(EvTy) \ + bool EvTy::equals(const InternalEvent *o) const { \ + if (const auto O = cast(o)) \ + return *this == *O; \ + return false; \ + } + +} // namespace internal + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/Logging.h b/openmp/tools/omptest/include/Logging.h new file mode 100644 index 0000000000000..0104191b1d15f --- /dev/null +++ b/openmp/tools/omptest/include/Logging.h @@ -0,0 +1,155 @@ +//===- Logging.h - General logging class ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides ompTest-tailored logging, with log-levels and formatting/coloring. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_LOGGING_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_LOGGING_H + +#include "OmptAssertEvent.h" + +#include +#include +#include +#include +#include +#include + +namespace omptest { +namespace logging { + +enum class Level : uint32_t { + // Levels (Note: DEBUG may already be reserved) + DIAGNOSTIC = 10, + INFO = 20, + WARNING = 30, + ERROR = 40, + CRITICAL = 50, + + // Types used for formatting options + Default, + ExpectedEvent, + ObservedEvent, + OffendingEvent, + + // Suppress all prints + SILENT = 0xFFFFFFFF +}; + +enum class FormatOption : uint32_t { + // General options + // Note: BOLD is actually "BRIGHT" -- But it will be perceived as 'bold' font + // It is implicitly switching colors to the 'Light' variant + // Thus, it has -NO EFFECT- when already using a Light* color + NONE = 0, + BOLD = 1, + DIM = 2, + UNDERLINED = 4, + BLINK = 5, + INVERTED = 7, + HIDDEN = 8, + // Foreground colors + COLOR_Default = 39, + COLOR_Black = 30, + COLOR_Red = 31, + COLOR_Green = 32, + COLOR_Yellow = 33, + COLOR_Blue = 34, + COLOR_Magenta = 35, + COLOR_Cyan = 36, + COLOR_LightGray = 37, + COLOR_DarkGray = 90, + COLOR_LightRed = 91, + COLOR_LightGreen = 92, + COLOR_LightYellow = 93, + COLOR_LightBlue = 94, + COLOR_LightMagenta = 95, + COLOR_LightCyan = 96, + COLOR_White = 97, + // Background colors + COLOR_BG_Default = 49, + COLOR_BG_Black = 40, + COLOR_BG_Red = 41, + COLOR_BG_Green = 42, + COLOR_BG_Yellow = 43, + COLOR_BG_Blue = 44, + COLOR_BG_Magenta = 45, + COLOR_BG_Cyan = 46, + COLOR_BG_LightGray = 47, + COLOR_BG_DarkGray = 100, + COLOR_BG_LightRed = 101, + COLOR_BG_LightGreen = 102, + COLOR_BG_LightYellow = 103, + COLOR_BG_LightBlue = 104, + COLOR_BG_LightMagenta = 105, + COLOR_BG_LightCyan = 106, + COLOR_BG_White = 107 +}; + +/// Returns a string representation of the given logging level. +const char *to_string(Level LogLevel); + +/// Returns the format options as escaped sequence, for the given logging level +std::string getFormatSequence(Level LogLevel = Level::Default); + +/// Format the given message with the provided option(s) and return it. +/// Here formatting is only concerning control sequences using character +/// which can be obtained using '\e' (on console), '\033' or '\x1B'. +std::string format(const std::string &Message, FormatOption Option); +std::string format(const std::string &Message, std::set Options); + +class Logger { +public: + Logger(Level LogLevel = Level::WARNING, std::ostream &OutStream = std::cerr, + bool FormatOutput = true); + ~Logger(); + + /// Log the given message to the output. + void log(Level LogLevel, const std::string &Message) const; + + /// Log a single event mismatch. + void eventMismatch(const omptest::OmptAssertEvent &OffendingEvent, + const std::string &Message, + Level LogLevel = Level::ERROR) const; + + /// Log an event-pair mismatch. + void eventMismatch(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent, + const std::string &Message, + Level LogLevel = Level::ERROR) const; + + /// Set if output is being formatted (e.g. colored). + void setFormatOutput(bool Enabled); + + /// Return the current (minimum) Logging Level. + Level getLoggingLevel() const; + + /// Set the (minimum) Logging Level. + void setLoggingLevel(Level LogLevel); + +private: + /// The minimum logging level that is considered by the logger instance. + Level LoggingLevel; + + /// The output stream used by the logger instance. + std::ostream &OutStream; + + /// Determine if log messages are formatted using control sequences. + bool FormatOutput; + + /// Mutex to ensure serialized logging + mutable std::mutex LogMutex; +}; + +} // namespace logging +} // namespace omptest + +#endif \ No newline at end of file diff --git a/openmp/tools/omptest/include/OmptAliases.h b/openmp/tools/omptest/include/OmptAliases.h new file mode 100644 index 0000000000000..500be5ef9f749 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAliases.h @@ -0,0 +1,85 @@ +//===- OmptAliases.h - Shorthand aliases for OMPT enum values ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines shorthand aliases for OMPT enum values, providing improved +/// ease-of-use and readability. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H + +#include + +/// Aliases for enum: ompt_scope_endpoint_t +constexpr ompt_scope_endpoint_t BEGIN = ompt_scope_begin; +constexpr ompt_scope_endpoint_t END = ompt_scope_end; +constexpr ompt_scope_endpoint_t BEGINEND = ompt_scope_beginend; + +/// Aliases for enum: ompt_target_t +constexpr ompt_target_t TARGET = ompt_target; +constexpr ompt_target_t ENTER_DATA = ompt_target_enter_data; +constexpr ompt_target_t EXIT_DATA = ompt_target_exit_data; +constexpr ompt_target_t UPDATE = ompt_target_update; +constexpr ompt_target_t TARGET_NOWAIT = ompt_target_nowait; +constexpr ompt_target_t ENTER_DATA_NOWAIT = ompt_target_enter_data_nowait; +constexpr ompt_target_t EXIT_DATA_NOWAIT = ompt_target_exit_data_nowait; +constexpr ompt_target_t UPDATE_NOWAIT = ompt_target_update_nowait; + +/// Aliases for enum: ompt_target_data_op_t +constexpr ompt_target_data_op_t ALLOC = ompt_target_data_alloc; +constexpr ompt_target_data_op_t H2D = ompt_target_data_transfer_to_device; +constexpr ompt_target_data_op_t D2H = ompt_target_data_transfer_from_device; +constexpr ompt_target_data_op_t DELETE = ompt_target_data_delete; +constexpr ompt_target_data_op_t ASSOCIATE = ompt_target_data_associate; +constexpr ompt_target_data_op_t DISASSOCIATE = ompt_target_data_disassociate; +constexpr ompt_target_data_op_t ALLOC_ASYNC = ompt_target_data_alloc_async; +constexpr ompt_target_data_op_t H2D_ASYNC = + ompt_target_data_transfer_to_device_async; +constexpr ompt_target_data_op_t D2H_ASYNC = + ompt_target_data_transfer_from_device_async; +constexpr ompt_target_data_op_t DELETE_ASYNC = ompt_target_data_delete_async; + +/// Aliases for enum: ompt_callbacks_t (partial) +constexpr ompt_callbacks_t CB_TARGET = ompt_callback_target; +constexpr ompt_callbacks_t CB_DATAOP = ompt_callback_target_data_op; +constexpr ompt_callbacks_t CB_KERNEL = ompt_callback_target_submit; + +/// Aliases for enum: ompt_work_t +constexpr ompt_work_t WORK_LOOP = ompt_work_loop; +constexpr ompt_work_t WORK_SECT = ompt_work_sections; +constexpr ompt_work_t WORK_EXEC = ompt_work_single_executor; +constexpr ompt_work_t WORK_SINGLE = ompt_work_single_other; +constexpr ompt_work_t WORK_SHARE = ompt_work_workshare; +constexpr ompt_work_t WORK_DIST = ompt_work_distribute; +constexpr ompt_work_t WORK_TASK = ompt_work_taskloop; +constexpr ompt_work_t WORK_SCOPE = ompt_work_scope; +constexpr ompt_work_t WORK_LOOP_STA = ompt_work_loop_static; +constexpr ompt_work_t WORK_LOOP_DYN = ompt_work_loop_dynamic; +constexpr ompt_work_t WORK_LOOP_GUI = ompt_work_loop_guided; +constexpr ompt_work_t WORK_LOOP_OTH = ompt_work_loop_other; + +/// Aliases for enum: ompt_sync_region_t +constexpr ompt_sync_region_t SR_BARRIER = ompt_sync_region_barrier; +constexpr ompt_sync_region_t SR_BARRIER_IMPL = + ompt_sync_region_barrier_implicit; +constexpr ompt_sync_region_t SR_BARRIER_EXPL = + ompt_sync_region_barrier_explicit; +constexpr ompt_sync_region_t SR_BARRIER_IMPLEMENTATION = + ompt_sync_region_barrier_implementation; +constexpr ompt_sync_region_t SR_TASKWAIT = ompt_sync_region_taskwait; +constexpr ompt_sync_region_t SR_TASKGROUP = ompt_sync_region_taskgroup; +constexpr ompt_sync_region_t SR_REDUCTION = ompt_sync_region_reduction; +constexpr ompt_sync_region_t SR_BARRIER_IMPL_WORKSHARE = + ompt_sync_region_barrier_implicit_workshare; +constexpr ompt_sync_region_t SR_BARRIER_IMPL_PARALLEL = + ompt_sync_region_barrier_implicit_parallel; +constexpr ompt_sync_region_t SR_BARRIER_TEAMS = ompt_sync_region_barrier_teams; + +#endif diff --git a/openmp/tools/omptest/include/OmptAssertEvent.h b/openmp/tools/omptest/include/OmptAssertEvent.h new file mode 100644 index 0000000000000..87d187c823796 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAssertEvent.h @@ -0,0 +1,377 @@ +//===- OmptAssertEvent.h - Assertion event declarations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains assertion event constructors, for generally all observable events. +/// This includes user-generated events, like synchronization. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H + +#include "InternalEvent.h" +#include "omp-tools.h" + +#include +#include +#include +#include + +namespace omptest { + +enum class ObserveState { generated, always, never }; + +/// Helper function, returning an ObserveState string representation +const char *to_string(ObserveState State); + +/// Assertion event struct, provides statically callable CTORs. +struct OmptAssertEvent { + static OmptAssertEvent AssertionSyncPoint(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + const std::string &SyncPointName); + + static OmptAssertEvent AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType); + + static OmptAssertEvent ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads); + + static OmptAssertEvent ParallelEnd( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + uint64_t Count = expectedDefault(uint64_t), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_dispatch_t Kind = expectedDefault(ompt_dispatch_t), + ompt_data_t Instance = expectedDefault(ompt_data_t)); + + static OmptAssertEvent + TaskCreate(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + const ompt_frame_t *EncounteringTaskFrame = + expectedDefault(ompt_frame_t *), + ompt_data_t *NewTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + int HasDependences = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent + ImplicitTask(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + unsigned int ActualParallelism = expectedDefault(unsigned int), + unsigned int Index = expectedDefault(unsigned int), + int Flags = expectedDefault(int)); + + static OmptAssertEvent + SyncRegion(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_id_t TargetId, + ompt_id_t HostOpId, ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, size_t Bytes, + const void *CodeptrRA); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + size_t Bytes = expectedDefault(size_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOpEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, + ompt_id_t *HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, + int DstDeviceNum, size_t Bytes, const void *CodeptrRA); + + static OmptAssertEvent + TargetDataOpEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + ompt_scope_endpoint_t Endpoint, + size_t Bytes = expectedDefault(size_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + ompt_id_t *HostOpId = expectedDefault(ompt_id_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams); + + static OmptAssertEvent + TargetSubmit(const std::string &Name, const std::string &Group, + const ObserveState &Expected, unsigned int RequestedNumTeams, + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + static OmptAssertEvent + TargetSubmitEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams); + + static OmptAssertEvent + TargetSubmitEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, unsigned int RequestedNumTeams, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + ompt_id_t *HostOpId = expectedDefault(ompt_id_t *)); + + static OmptAssertEvent ControlTool(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent DeviceInitialize( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Type = expectedDefault(const char *), + ompt_device_t *Device = expectedDefault(ompt_device_t *), + ompt_function_lookup_t LookupFn = expectedDefault(ompt_function_lookup_t), + const char *DocumentationStr = expectedDefault(const char *)); + + static OmptAssertEvent DeviceFinalize(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum); + + static OmptAssertEvent + DeviceLoad(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Filename = expectedDefault(const char *), + int64_t OffsetInFile = expectedDefault(int64_t), + void *VmaInFile = expectedDefault(void *), + size_t Bytes = expectedDefault(size_t), + void *HostAddr = expectedDefault(void *), + void *DeviceAddr = expectedDefault(void *), + uint64_t ModuleId = expectedDefault(int64_t)); + + static OmptAssertEvent DeviceUnload(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent BufferRequest(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum, ompt_buffer_t **Buffer, + size_t *Bytes); + + static OmptAssertEvent + BufferComplete(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned); + + static OmptAssertEvent BufferRecord(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_record_ompt_t *Record); + + /// Handle type = ompt_record_target_t + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_id_t TaskId = expectedDefault(ompt_id_t), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_data_op + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + std::pair Timeframe, + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_data_op + static OmptAssertEvent BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes = expectedDefault(size_t), + ompt_device_time_t MinimumTimeDelta = expectedDefault(ompt_device_time_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_submit + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + std::pair Timeframe, + unsigned int RequestedNumTeams = expectedDefault(unsigned int), + unsigned int GrantedNumTeams = expectedDefault(unsigned int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + /// Handle type = ompt_callback_target_submit + /// Note: This will also act as the simplest default CTOR + static OmptAssertEvent BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_device_time_t MinimumTimeDelta = expectedDefault(ompt_device_time_t), + unsigned int RequestedNumTeams = expectedDefault(unsigned int), + unsigned int GrantedNumTeams = expectedDefault(unsigned int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + static OmptAssertEvent BufferRecordDeallocation(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_buffer_t *Buffer); + + /// Allow move construction (due to std::unique_ptr) + OmptAssertEvent(OmptAssertEvent &&o) = default; + OmptAssertEvent &operator=(OmptAssertEvent &&o) = default; + + /// Get the event's name + std::string getEventName() const; + + /// Get the event's group name + std::string getEventGroup() const; + + /// Get the event's expected observation state + ObserveState getEventExpectedState() const; + + /// Return the actual event type enum value + internal::EventTy getEventType() const; + + /// Get a pointer to the internal event + internal::InternalEvent *getEvent() const; + + /// Make events comparable + friend bool operator==(const OmptAssertEvent &A, const OmptAssertEvent &B); + + /// Returns the string representation of the event + std::string toString(bool PrefixEventName = false) const; + +private: + OmptAssertEvent(const std::string &Name, const std::string &Group, + const ObserveState &Expected, internal::InternalEvent *IE); + OmptAssertEvent(const OmptAssertEvent &o) = delete; + + /// Determine the event name. Either it is provided directly or determined + /// from the calling function's name. + static std::string getName(const std::string &Name, + const char *Caller = __builtin_FUNCTION()) { + std::string EName = Name; + if (EName.empty()) + EName.append(Caller).append(" (auto generated)"); + + return EName; + } + + /// Determine the event name. Either it is provided directly or "default". + static std::string getGroup(const std::string &Group) { + if (Group.empty()) + return "default"; + + return Group; + } + + std::string Name; + std::string Group; + ObserveState ExpectedState; + std::unique_ptr TheEvent; +}; + +/// POD type, which holds the target region id, corresponding to an event group. +struct AssertEventGroup { + AssertEventGroup(uint64_t TargetRegion) : TargetRegion(TargetRegion) {} + uint64_t TargetRegion; +}; + +bool operator==(const OmptAssertEvent &A, const OmptAssertEvent &B); + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/OmptAsserter.h b/openmp/tools/omptest/include/OmptAsserter.h new file mode 100644 index 0000000000000..64cbb5f3642f9 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAsserter.h @@ -0,0 +1,291 @@ +//===- OmptAsserter.h - Asserter-related classes, enums, etc. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains all asserter-related class declarations and important enums. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTER_H + +#include "Logging.h" +#include "OmptAssertEvent.h" + +#include +#include +#include +#include +#include +#include + +namespace omptest { + +// Forward declaration. +class OmptEventGroupInterface; + +enum class AssertMode { strict, relaxed }; +enum class AssertState { pass, fail }; + +/// General base class for the subscriber/notification pattern in +/// OmptCallbachHandler. Derived classes need to implement the notify method. +class OmptListener { +public: + virtual ~OmptListener() = default; + + /// Called for each registered OMPT event of the OmptCallbackHandler + virtual void notify(omptest::OmptAssertEvent &&AE) = 0; + + /// Control whether this asserter should be considered 'active'. + void setActive(bool Enabled); + + /// Check if this asserter is considered 'active'. + bool isActive(); + + /// Check if the given event type is in the set of suppressed event types. + bool isSuppressedEventType(omptest::internal::EventTy EvTy); + + /// Remove the given event type to the set of suppressed events. + void permitEvent(omptest::internal::EventTy EvTy); + + /// Add the given event type to the set of suppressed events. + void suppressEvent(omptest::internal::EventTy EvTy); + +private: + bool Active{true}; + + // Add event types to the set of suppressed events by default. + std::set SuppressedEvents{ + omptest::internal::EventTy::ThreadBegin, + omptest::internal::EventTy::ThreadEnd, + omptest::internal::EventTy::ParallelBegin, + omptest::internal::EventTy::ParallelEnd, + omptest::internal::EventTy::Work, + omptest::internal::EventTy::Dispatch, + omptest::internal::EventTy::TaskCreate, + omptest::internal::EventTy::Dependences, + omptest::internal::EventTy::TaskDependence, + omptest::internal::EventTy::TaskSchedule, + omptest::internal::EventTy::ImplicitTask, + omptest::internal::EventTy::Masked, + omptest::internal::EventTy::SyncRegion, + omptest::internal::EventTy::MutexAcquire, + omptest::internal::EventTy::Mutex, + omptest::internal::EventTy::NestLock, + omptest::internal::EventTy::Flush, + omptest::internal::EventTy::Cancel}; +}; + +/// Base class for asserting on OMPT events +class OmptAsserter : public OmptListener { +public: + OmptAsserter(); + virtual ~OmptAsserter() = default; + + /// Add an event to the asserter's internal data structure. + virtual void insert(omptest::OmptAssertEvent &&AE); + + /// Called from the CallbackHandler with a corresponding AssertEvent to which + /// callback was handled. + void notify(omptest::OmptAssertEvent &&AE) override; + + /// Implemented in subclasses to implement what should actually be done with + /// the notification. + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) = 0; + + /// Get the number of currently remaining events, with: ObserveState::always. + virtual size_t getRemainingEventCount() = 0; + + /// Get the total number of received, effective notifications. + int getNotificationCount() { return NumNotifications; } + + /// Get the total number of successful assertion checks. + int getSuccessfulAssertionCount() { return NumSuccessfulAsserts; } + + /// Get the asserter's current operationmode: e.g.: strict or relaxed. + AssertMode getOperationMode() { return OperationMode; } + + /// Return the asserter's current state. + omptest::AssertState getState() { return State; } + + /// Determine and return the asserter's state. + virtual omptest::AssertState checkState(); + + /// Accessor for the event group interface. + std::shared_ptr getEventGroups() const { + return EventGroups; + } + + /// Accessor for the event group interface. + std::shared_ptr getLog() const { return Log; } + + /// Check the observed events' group association. If the event indicates the + /// begin/end of an OpenMP target region, we will create/deprecate the + /// expected event's group. Return true if the expected event group exists + /// (and is active), otherwise: false. Note: BufferRecords may also match with + /// deprecated groups as they may be delivered asynchronously. + bool verifyEventGroups(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent); + + /// Set the asserter's mode of operation w.r.t. assertion. + void setOperationMode(AssertMode Mode); + + /// The total number of effective notifications. For example, if specific + /// notifications are to be ignored, they will not count towards this total. + int NumNotifications{0}; + + /// The number of successful assertion checks. + int NumSuccessfulAsserts{0}; + +protected: + /// The asserter's current state. + omptest::AssertState State{omptest::AssertState::pass}; + + /// Mutex to avoid data races w.r.t. event notifications and/or insertions. + std::mutex AssertMutex; + + /// Pointer to the OmptEventGroupInterface. + std::shared_ptr EventGroups{nullptr}; + + /// Pointer to the logging instance. + std::shared_ptr Log{nullptr}; + + /// Operation mode during assertion / notification. + AssertMode OperationMode{AssertMode::strict}; + +private: + /// Mutex for creating/accessing the singleton members + static std::mutex StaticMemberAccessMutex; + + /// Static member to manage the singleton event group interface instance + static std::weak_ptr EventGroupInterfaceInstance; + + /// Static member to manage the singleton logging instance + static std::weak_ptr LoggingInstance; +}; + +/// Class that can assert in a sequenced fashion, i.e., events have to occur in +/// the order they were registered +class OmptSequencedAsserter : public OmptAsserter { +public: + OmptSequencedAsserter() : OmptAsserter(), NextEvent(0) {} + + /// Add the event to the in-sequence set of events that the asserter should + /// check for. + void insert(omptest::OmptAssertEvent &&AE) override; + + /// Implements the asserter's actual logic + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) override; + + size_t getRemainingEventCount() override; + + omptest::AssertState checkState() override; + + bool AssertionSuspended{false}; + +protected: + /// Notification helper function, implementing SyncPoint logic. Returns true + /// in case of consumed event, indicating early exit of notification. + bool consumeSyncPoint(const omptest::OmptAssertEvent &AE); + + /// Notification helper function, implementing excess event notification + /// logic. Returns true when no more events were expected, indicating early + /// exit of notification. + bool checkExcessNotify(const omptest::OmptAssertEvent &AE); + + /// Notification helper function, implementing Suspend logic. Returns true + /// in case of consumed event, indicating early exit of notification. + bool consumeSuspend(); + + /// Notification helper function, implementing regular event notification + /// logic. Returns true when a matching event was encountered, indicating + /// early exit of notification. + bool consumeRegularEvent(const omptest::OmptAssertEvent &AE); + +public: + /// Index of the next, expected event. + size_t NextEvent{0}; + std::vector Events{}; +}; + +/// Class that asserts with set semantics, i.e., unordered +struct OmptEventAsserter : public OmptAsserter { + OmptEventAsserter() : OmptAsserter(), NumEvents(0), Events() {} + + /// Add the event to the set of events that the asserter should check for. + void insert(omptest::OmptAssertEvent &&AE) override; + + /// Implements the asserter's logic + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) override; + + size_t getRemainingEventCount() override; + + omptest::AssertState checkState() override; + + size_t NumEvents{0}; + + /// For now use vector (but do set semantics) + // TODO std::unordered_set? + std::vector Events{}; +}; + +/// Class that reports the occurred events +class OmptEventReporter : public OmptListener { +public: + OmptEventReporter(std::ostream &OutStream = std::cout) + : OutStream(OutStream) {} + + /// Called from the CallbackHandler with a corresponding AssertEvent to which + /// callback was handled. + void notify(omptest::OmptAssertEvent &&AE) override; + +private: + std::ostream &OutStream; +}; + +/// This class provides the members and methods to manage event groups and +/// SyncPoints in conjunction with asserters. Most importantly it maintains a +/// coherent view of active and past events or SyncPoints. +class OmptEventGroupInterface { +public: + OmptEventGroupInterface() = default; + ~OmptEventGroupInterface() = default; + + /// Non-copyable and non-movable + OmptEventGroupInterface(const OmptEventGroupInterface &) = delete; + OmptEventGroupInterface &operator=(const OmptEventGroupInterface &) = delete; + OmptEventGroupInterface(OmptEventGroupInterface &&) = delete; + OmptEventGroupInterface &operator=(OmptEventGroupInterface &&) = delete; + + /// Add given group to the set of active event groups. Effectively connecting + /// the given groupname (expected) with a target region id (observed). + bool addActiveEventGroup(const std::string &GroupName, + omptest::AssertEventGroup Group); + + /// Move given group from the set of active event groups to the set of + /// previously active event groups. + bool deprecateActiveEventGroup(const std::string &GroupName); + + /// Check if given group is currently part of the active event groups. + bool checkActiveEventGroups(const std::string &GroupName, + omptest::AssertEventGroup Group); + + /// Check if given group is currently part of the deprecated event groups. + bool checkDeprecatedEventGroups(const std::string &GroupName, + omptest::AssertEventGroup Group); + +private: + mutable std::mutex GroupMutex; + std::map ActiveEventGroups{}; + std::map DeprecatedEventGroups{}; + std::set EncounteredSyncPoints{}; +}; + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/OmptCallbackHandler.h b/openmp/tools/omptest/include/OmptCallbackHandler.h new file mode 100644 index 0000000000000..40076c386107e --- /dev/null +++ b/openmp/tools/omptest/include/OmptCallbackHandler.h @@ -0,0 +1,165 @@ +//===- OmptCallbackHandler.h - Callback reception and handling --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides the OMPT callback handling declarations. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTCALLBACKHANDLER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTCALLBACKHANDLER_H + +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" + +#include "omp-tools.h" + +#include + +namespace omptest { + +/// Handler class to do whatever is needed to be done when a callback is invoked +/// by the OMP runtime +/// Supports a RecordAndReplay mechanism in which all OMPT events are recorded +/// and then replayed. This is so that a test can assert on, e.g., a device +/// initialize event, even though this would occur before a unit test is +/// actually executed. +class OmptCallbackHandler { +public: + ~OmptCallbackHandler() = default; + + /// Singleton handler + static OmptCallbackHandler &get(); + + /// Subscribe a listener to be notified for OMPT events + void subscribe(OmptListener *Listener); + + /// Remove all subscribers + void clearSubscribers(); + + /// When the record and replay mechanism is enabled this replays all OMPT + /// events + void replay(); + + /// Special asserter callback which checks that upon encountering the + /// synchronization point, all expected events have been processed. That is: + /// there are currently no remaining expected events for any asserter. + void handleAssertionSyncPoint(const std::string &SyncPointName); + + void handleThreadBegin(ompt_thread_t ThreadType, ompt_data_t *ThreadData); + + void handleThreadEnd(ompt_data_t *ThreadData); + + void handleTaskCreate(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *NewTaskData, int Flags, int HasDependences, + const void *CodeptrRA); + + void handleTaskSchedule(ompt_data_t *PriorTaskData, + ompt_task_status_t PriorTaskStatus, + ompt_data_t *NextTaskData); + + void handleImplicitTask(ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + unsigned int ActualParallelism, unsigned int Index, + int Flags); + + void handleParallelBegin(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *ParallelData, + unsigned int RequestedParallelism, int Flags, + const void *CodeptrRA); + + void handleParallelEnd(ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, int Flags, + const void *CodeptrRA); + + void handleDeviceInitialize(int DeviceNum, const char *Type, + ompt_device_t *Device, + ompt_function_lookup_t LookupFn, + const char *DocumentationStr); + + void handleDeviceFinalize(int DeviceNum); + + void handleTarget(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, ompt_id_t TargetId, + const void *CodeptrRA); + + void handleTargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, + const void *CodeptrRA); + + void handleTargetSubmit(ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams); + + void handleTargetSubmitEmi(ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams); + + void handleTargetDataOp(ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA); + + void handleTargetDataOpEmi(ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA); + + void handleDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, uint64_t ModuleId); + + void handleDeviceUnload(int DeviceNum, uint64_t ModuleId); + + void handleBufferRequest(int DeviceNum, ompt_buffer_t **Buffer, + size_t *Bytes); + + void handleBufferComplete(int DeviceNum, ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned); + + void handleBufferRecord(ompt_record_ompt_t *Record); + + void handleBufferRecordDeallocation(ompt_buffer_t *Buffer); + + /// Not needed for a conforming minimal OMPT implementation + void handleWork(ompt_work_t WorkType, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + uint64_t Count, const void *CodeptrRA); + + void handleDispatch(ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance); + + void handleSyncRegion(ompt_sync_region_t Kind, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + const void *CodeptrRA); + +private: + /// Wrapper around emplace_back for potential additional logging / checking or + /// so + void recordEvent(OmptAssertEvent &&Event); + + /// Listeners to be notified + std::vector Subscribers; + + /// Toggle if OMPT events should notify subscribers immediately or not + bool RecordAndReplay{false}; + + /// Recorded events in Record and Replay mode + std::vector RecordedEvents; +}; + +} // namespace omptest + +// Pointer to global callback handler +extern omptest::OmptCallbackHandler *Handler; + +#endif diff --git a/openmp/tools/omptest/include/OmptTester.h b/openmp/tools/omptest/include/OmptTester.h new file mode 100644 index 0000000000000..155e61d5f7482 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTester.h @@ -0,0 +1,60 @@ +//===- OmptTester.h - Main header for ompTest usage -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the main header file for usage of the ompTest library. +/// Depending on the build either 'standalone' or GoogleTest headers are +/// included and corresponding main-function macros are defined. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTER_H + +#include "AssertMacros.h" +#include "Logging.h" +#include "OmptAliases.h" +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptCallbackHandler.h" + +#include +#include +#include +#include +#include +#include +#include + +// Standalone header section +#ifdef OPENMP_LIBOMPTEST_BUILD_STANDALONE + +#include "OmptTesterStandalone.h" + +// Define standalone main function (place once at the bottom of a testsuite) +#define OMPTEST_TESTSUITE_MAIN() \ + int main(int argc, char **argv) { \ + Runner R; \ + return R.run(); \ + } + +// GoogleTest header section +#else + +#include "OmptTesterGoogleTest.h" + +// Define GoogleTest main function (place once at the bottom of a testsuite) +#define OMPTEST_TESTSUITE_MAIN() \ + int main(int argc, char **argv) { \ + testing::InitGoogleTest(&argc, argv); \ + return RUN_ALL_TESTS(); \ + } + +#endif + +#endif diff --git a/openmp/tools/omptest/include/OmptTesterGlobals.h b/openmp/tools/omptest/include/OmptTesterGlobals.h new file mode 100644 index 0000000000000..62f443aed80e0 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterGlobals.h @@ -0,0 +1,36 @@ +//===- OmptTesterGlobals.h - Global function declarations -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains global function declarations, esp. for OMPT symbols. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGLOBALS_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGLOBALS_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version); +int start_trace(ompt_device_t *Device); +int flush_trace(ompt_device_t *Device); +// Function which calls flush_trace(ompt_device_t *) on all traced devices. +int flush_traced_devices(); +int stop_trace(ompt_device_t *Device); +// Function which calls stop_trace(ompt_device_t *) on all traced devices. +int stop_trace_devices(); +void libomptest_global_eventreporter_set_active(bool State); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/openmp/tools/omptest/include/OmptTesterGoogleTest.h b/openmp/tools/omptest/include/OmptTesterGoogleTest.h new file mode 100644 index 0000000000000..51b94bc678f50 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterGoogleTest.h @@ -0,0 +1,86 @@ +//===- OmptTesterGoogleTest.h - GoogleTest header variant -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the GoogleTest-based header variant, defining the +/// actual test classes and their behavior. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGOOGLETEST_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGOOGLETEST_H + +#include "AssertMacros.h" +#include "OmptAliases.h" +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptCallbackHandler.h" +#include "OmptTesterGlobals.h" + +// This will allow us to override the "TEST" macro of gtest +#define GTEST_DONT_DEFINE_TEST 1 +#include "gtest/gtest.h" + +namespace testing { +class GTEST_API_ OmptTestCase : public testing::Test, + public omptest::OmptEventGroupInterface { +public: + std::unique_ptr SequenceAsserter = + std::make_unique(); + std::unique_ptr SetAsserter = + std::make_unique(); + std::unique_ptr EventReporter = + std::make_unique(); + +protected: + void SetUp() override { + omptest::OmptCallbackHandler::get().subscribe(SequenceAsserter.get()); + omptest::OmptCallbackHandler::get().subscribe(SetAsserter.get()); + omptest::OmptCallbackHandler::get().subscribe(EventReporter.get()); + } + + void TearDown() override { + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // Remove subscribers to not be notified of events after test execution. + omptest::OmptCallbackHandler::get().clearSubscribers(); + + // This common testcase must not encounter any failures. + if (SequenceAsserter->checkState() == omptest::AssertState::fail || + SetAsserter->checkState() == omptest::AssertState::fail) + ADD_FAILURE(); + } +}; + +class GTEST_API_ OmptTestCaseXFail : public testing::OmptTestCase { +protected: + void TearDown() override { + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // Remove subscribers to not be notified of events after test execution. + omptest::OmptCallbackHandler::get().clearSubscribers(); + + // This eXpectedly failing testcase has to encounter at least one failure. + if (SequenceAsserter->checkState() == omptest::AssertState::pass && + SetAsserter->checkState() == omptest::AssertState::pass) + ADD_FAILURE(); + } +}; +} // namespace testing + +#define TEST(test_suite_name, test_name) \ + GTEST_TEST_(test_suite_name, test_name, ::testing::OmptTestCase, \ + ::testing::internal::GetTypeId<::testing::OmptTestCase>()) + +#define TEST_XFAIL(test_suite_name, test_name) \ + GTEST_TEST_(test_suite_name, test_name, ::testing::OmptTestCaseXFail, \ + ::testing::internal::GetTypeId<::testing::OmptTestCaseXFail>()) + +#endif // include guard diff --git a/openmp/tools/omptest/include/OmptTesterStandalone.h b/openmp/tools/omptest/include/OmptTesterStandalone.h new file mode 100644 index 0000000000000..06649031c5d1c --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterStandalone.h @@ -0,0 +1,123 @@ +//===- OmptTesterStandalone.h - Standalone header variant -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the 'standalone' header variant, defining the actual +/// test classes and their behavior (it does not have external dependencies). +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERSTANDALONE_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERSTANDALONE_H + +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptTesterGlobals.h" + +#include +#include + +// Forward declarations. +namespace omptest { +struct OmptEventAsserter; +class OmptEventReporter; +class OmptSequencedAsserter; +} // namespace omptest + +struct Error { + operator bool() { return Fail; } + bool Fail; +}; + +/// A pretty crude test case abstraction +struct TestCase { + TestCase(const std::string &name) + : IsDisabled(name.rfind("DISABLED_", 0) == 0), Name(name) {} + TestCase(const std::string &name, const omptest::AssertState &expected) + : IsDisabled(name.rfind("DISABLED_", 0) == 0), Name(name), + ExpectedState(expected) {} + virtual ~TestCase() = default; + Error exec(); + virtual void execImpl() { assert(false && "Allocating base class"); } + + bool IsDisabled{false}; + std::string Name; + omptest::AssertState ExpectedState{omptest::AssertState::pass}; + omptest::AssertState ResultState{omptest::AssertState::pass}; + + std::unique_ptr SequenceAsserter = + std::make_unique(); + std::unique_ptr SetAsserter = + std::make_unique(); + std::unique_ptr EventReporter = + std::make_unique(); +}; +/// A pretty crude test suite abstraction +struct TestSuite { + using TestCaseVec = std::vector>; + std::string Name; + TestSuite() = default; + TestSuite(const TestSuite &O) = delete; + TestSuite(TestSuite &&O); + void setup(); + void teardown(); + TestCaseVec::iterator begin(); + TestCaseVec::iterator end(); + TestCaseVec TestCases; +}; +/// Static class used to register all test cases and provide them to the driver +class TestRegistrar { +public: + static TestRegistrar &get(); + static std::vector getTestSuites(); + static void addCaseToSuite(TestCase *TC, std::string TSName); + +private: + TestRegistrar() = default; + TestRegistrar(const TestRegistrar &o) = delete; + TestRegistrar operator=(const TestRegistrar &o) = delete; + // Keep tests in order 'of appearance' (top -> bottom), avoid unordered_map + static std::map Tests; +}; +/// Hack to register test cases +struct Registerer { + Registerer(TestCase *TC, const std::string SuiteName); +}; +/// Eventually executes all test suites and cases, should contain logic to skip +/// stuff if needed +struct Runner { + Runner() : TestSuites(TestRegistrar::get().getTestSuites()) {} + int run(); + void reportError(const Error &Err); + void abortOrKeepGoing(); + // Print an execution summary of all testsuites and their corresponding + // testcases. + void printSummary(); + std::vector TestSuites; +}; + +/// MACROS TO DEFINE A TESTSUITE + TESTCASE (like GoogleTest does) +#define XQUOTE(str) QUOTE(str) +#define QUOTE(str) #str + +#define TEST_TEMPLATE(SuiteName, CaseName, ExpectedState) \ + struct SuiteName##_##CaseName : public TestCase { \ + SuiteName##_##CaseName() \ + : TestCase(XQUOTE(CaseName), omptest::AssertState::ExpectedState) {} \ + virtual void execImpl() override; \ + }; \ + static Registerer R_##SuiteName##CaseName(new SuiteName##_##CaseName(), \ + #SuiteName); \ + void SuiteName##_##CaseName::execImpl() + +#define TEST(SuiteName, CaseName) \ + TEST_TEMPLATE(SuiteName, CaseName, /*ExpectedState=*/pass) +#define TEST_XFAIL(SuiteName, CaseName) \ + TEST_TEMPLATE(SuiteName, CaseName, /*ExpectedState=*/fail) + +#endif diff --git a/openmp/tools/omptest/src/InternalEvent.cpp b/openmp/tools/omptest/src/InternalEvent.cpp new file mode 100644 index 0000000000000..87daf5a6a31ba --- /dev/null +++ b/openmp/tools/omptest/src/InternalEvent.cpp @@ -0,0 +1,367 @@ +//===- InternalEvent.cpp - Internal event implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements internal event representation methods and helper functions. +/// +//===----------------------------------------------------------------------===// + +#include "InternalEvent.h" + +#include +#include + +using namespace omptest; +using namespace util; + +std::string util::makeHexString(uint64_t Data, bool IsPointer, size_t MinBytes, + bool ShowHexBase) { + if (Data == 0 && IsPointer) + return "(nil)"; + + thread_local std::ostringstream os; + // Clear the content of the stream + os.str(std::string()); + + // Manually prefixing "0x" will make the use of std::setfill more easy + if (ShowHexBase) + os << "0x"; + + // Default to 32bit (8 hex digits) width, if exceeding 64bit or zero value + size_t NumDigits = (MinBytes > 0 && MinBytes < 9) ? (MinBytes << 1) : 8; + + if (MinBytes > 0) + os << std::setfill('0') << std::setw(NumDigits); + + os << std::hex << Data; + return os.str(); +} + +std::string internal::AssertionSyncPoint::toString() const { + std::string S{"Assertion SyncPoint: '"}; + S.append(Name).append(1, '\''); + return S; +} + +std::string internal::ThreadBegin::toString() const { + std::string S{"OMPT Callback ThreadBegin: "}; + S.append("ThreadType=").append(std::to_string(ThreadType)); + return S; +} + +std::string internal::ThreadEnd::toString() const { + std::string S{"OMPT Callback ThreadEnd"}; + return S; +} + +std::string internal::ParallelBegin::toString() const { + std::string S{"OMPT Callback ParallelBegin: "}; + S.append("NumThreads=").append(std::to_string(NumThreads)); + return S; +} + +std::string internal::ParallelEnd::toString() const { + // TODO: Should we expose more detailed info here? + std::string S{"OMPT Callback ParallelEnd"}; + return S; +} + +std::string internal::Work::toString() const { + std::string S{"OMPT Callback Work: "}; + S.append("work_type=").append(std::to_string(WorkType)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" count=").append(std::to_string(Count)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::Dispatch::toString() const { + std::string S{"OMPT Callback Dispatch: "}; + S.append("parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" kind=").append(std::to_string(Kind)); + // TODO Check what to print for instance in all different cases + if (Kind == ompt_dispatch_iteration) { + S.append(" instance=[it=") + .append(std::to_string(Instance.value)) + .append(1, ']'); + } else if (Kind == ompt_dispatch_section) { + S.append(" instance=[ptr=") + .append(makeHexString((uint64_t)Instance.ptr)) + .append(1, ']'); + } else if ((Kind == ompt_dispatch_ws_loop_chunk || + Kind == ompt_dispatch_taskloop_chunk || + Kind == ompt_dispatch_distribute_chunk) && + Instance.ptr != nullptr) { + auto Chunk = static_cast(Instance.ptr); + S.append(" instance=[chunk=(start=") + .append(std::to_string(Chunk->start)) + .append(", iterations=") + .append(std::to_string(Chunk->iterations)) + .append(")]"); + } + return S; +} + +std::string internal::TaskCreate::toString() const { + std::string S{"OMPT Callback TaskCreate: "}; + S.append("encountering_task_data=") + .append(makeHexString((uint64_t)EncounteringTaskData)); + S.append(" encountering_task_frame=") + .append(makeHexString((uint64_t)EncounteringTaskFrame)); + S.append(" new_task_data=").append(makeHexString((uint64_t)NewTaskData)); + S.append(" flags=").append(std::to_string(Flags)); + S.append(" has_dependences=").append(std::to_string(HasDependences)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::ImplicitTask::toString() const { + std::string S{"OMPT Callback ImplicitTask: "}; + S.append("endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" actual_parallelism=").append(std::to_string(ActualParallelism)); + S.append(" index=").append(std::to_string(Index)); + S.append(" flags=").append(std::to_string(Flags)); + return S; +} + +std::string internal::SyncRegion::toString() const { + std::string S{"OMPT Callback SyncRegion: "}; + S.append("kind=").append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::Target::toString() const { + // TODO Should we canonicalize the string prefix (use "OMPT ..." everywhere)? + std::string S{"Callback Target: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" kind=").append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" device_num=").append(std::to_string(DeviceNum)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetEmi::toString() const { + // TODO Should we canonicalize the string prefix (use "OMPT ..." everywhere)? + std::string S{"Callback Target EMI: kind="}; + S.append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" device_num=").append(std::to_string(DeviceNum)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" (") + .append(makeHexString((uint64_t)(TaskData) ? TaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_task_data=") + .append(makeHexString((uint64_t)TargetTaskData)); + S.append(" (") + .append( + makeHexString((uint64_t)(TargetTaskData) ? TargetTaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetDataOp::toString() const { + std::string S{" Callback DataOp: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" host_op_id=").append(std::to_string(HostOpId)); + S.append(" optype=").append(std::to_string(OpType)); + S.append(" src=").append(makeHexString((uint64_t)SrcAddr)); + S.append(" src_device_num=").append(std::to_string(SrcDeviceNum)); + S.append(" dest=").append(makeHexString((uint64_t)DstAddr)); + S.append(" dest_device_num=").append(std::to_string(DstDeviceNum)); + S.append(" bytes=").append(std::to_string(Bytes)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetDataOpEmi::toString() const { + std::string S{" Callback DataOp EMI: endpoint="}; + S.append(std::to_string(Endpoint)); + S.append(" optype=").append(std::to_string(OpType)); + S.append(" target_task_data=") + .append(makeHexString((uint64_t)TargetTaskData)); + S.append(" (") + .append( + makeHexString((uint64_t)(TargetTaskData) ? TargetTaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" host_op_id=").append(makeHexString((uint64_t)HostOpId)); + S.append(" (") + .append(makeHexString((uint64_t)(HostOpId) ? (*HostOpId) : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" src=").append(makeHexString((uint64_t)SrcAddr)); + S.append(" src_device_num=").append(std::to_string(SrcDeviceNum)); + S.append(" dest=").append(makeHexString((uint64_t)DstAddr)); + S.append(" dest_device_num=").append(std::to_string(DstDeviceNum)); + S.append(" bytes=").append(std::to_string(Bytes)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetSubmit::toString() const { + std::string S{" Callback Submit: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" host_op_id=").append(std::to_string(HostOpId)); + S.append(" req_num_teams=").append(std::to_string(RequestedNumTeams)); + return S; +} + +std::string internal::TargetSubmitEmi::toString() const { + std::string S{" Callback Submit EMI: endpoint="}; + S.append(std::to_string(Endpoint)); + S.append(" req_num_teams=").append(std::to_string(RequestedNumTeams)); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" host_op_id=").append(makeHexString((uint64_t)HostOpId)); + S.append(" (") + .append(makeHexString((uint64_t)(HostOpId) ? (*HostOpId) : 0, + /*IsPointer=*/false)) + .append(1, ')'); + return S; +} + +std::string internal::DeviceInitialize::toString() const { + std::string S{"Callback Init: device_num="}; + S.append(std::to_string(DeviceNum)); + S.append(" type=").append((Type) ? Type : "(null)"); + S.append(" device=").append(makeHexString((uint64_t)Device)); + S.append(" lookup=").append(makeHexString((uint64_t)LookupFn)); + S.append(" doc=").append(makeHexString((uint64_t)DocStr)); + return S; +} + +std::string internal::DeviceFinalize::toString() const { + std::string S{"Callback Fini: device_num="}; + S.append(std::to_string(DeviceNum)); + return S; +} + +std::string internal::DeviceLoad::toString() const { + std::string S{"Callback Load: device_num:"}; + S.append(std::to_string(DeviceNum)); + S.append(" module_id:").append(std::to_string(ModuleId)); + S.append(" filename:").append((Filename == nullptr) ? "(null)" : Filename); + S.append(" host_adddr:").append(makeHexString((uint64_t)HostAddr)); + S.append(" device_addr:").append(makeHexString((uint64_t)DeviceAddr)); + S.append(" bytes:").append(std::to_string(Bytes)); + return S; +} + +std::string internal::BufferRequest::toString() const { + std::string S{"Allocated "}; + S.append(std::to_string((Bytes != nullptr) ? *Bytes : 0)) + .append(" bytes at "); + S.append(makeHexString((Buffer != nullptr) ? (uint64_t)*Buffer : 0)); + S.append(" in buffer request callback"); + return S; +} + +std::string internal::BufferComplete::toString() const { + std::string S{"Executing buffer complete callback: "}; + S.append(std::to_string(DeviceNum)).append(1, ' '); + S.append(makeHexString((uint64_t)Buffer)).append(1, ' '); + S.append(std::to_string(Bytes)).append(1, ' '); + S.append(makeHexString((uint64_t)Begin)).append(1, ' '); + S.append(std::to_string(BufferOwned)); + return S; +} + +std::string internal::BufferRecord::toString() const { + std::string S{""}; + std::string T{""}; + S.append("rec=").append(makeHexString((uint64_t)RecordPtr)); + S.append(" type=").append(std::to_string(Record.type)); + + T.append("time=").append(std::to_string(Record.time)); + T.append(" thread_id=").append(std::to_string(Record.thread_id)); + T.append(" target_id=").append(std::to_string(Record.target_id)); + + switch (Record.type) { + case ompt_callback_target: + case ompt_callback_target_emi: { + // Handle Target Record + ompt_record_target_t TR = Record.record.target; + S.append(" (Target task) ").append(T); + S.append(" kind=").append(std::to_string(TR.kind)); + S.append(" endpoint=").append(std::to_string(TR.endpoint)); + S.append(" device=").append(std::to_string(TR.device_num)); + S.append(" task_id=").append(std::to_string(TR.task_id)); + S.append(" codeptr=").append(makeHexString((uint64_t)TR.codeptr_ra)); + break; + } + case ompt_callback_target_data_op: + case ompt_callback_target_data_op_emi: { + // Handle Target DataOp Record + ompt_record_target_data_op_t TDR = Record.record.target_data_op; + S.append(" (Target data op) ").append(T); + S.append(" host_op_id=").append(std::to_string(TDR.host_op_id)); + S.append(" optype=").append(std::to_string(TDR.optype)); + S.append(" src_addr=").append(makeHexString((uint64_t)TDR.src_addr)); + S.append(" src_device=").append(std::to_string(TDR.src_device_num)); + S.append(" dest_addr=").append(makeHexString((uint64_t)TDR.dest_addr)); + S.append(" dest_device=").append(std::to_string(TDR.dest_device_num)); + S.append(" bytes=").append(std::to_string(TDR.bytes)); + S.append(" end_time=").append(std::to_string(TDR.end_time)); + S.append(" duration=").append(std::to_string(TDR.end_time - Record.time)); + S.append(" ns codeptr=").append(makeHexString((uint64_t)TDR.codeptr_ra)); + break; + } + case ompt_callback_target_submit: + case ompt_callback_target_submit_emi: { + // Handle Target Kernel Record + ompt_record_target_kernel_t TKR = Record.record.target_kernel; + S.append(" (Target kernel) ").append(T); + S.append(" host_op_id=").append(std::to_string(TKR.host_op_id)); + S.append(" requested_num_teams=") + .append(std::to_string(TKR.requested_num_teams)); + S.append(" granted_num_teams=") + .append(std::to_string(TKR.granted_num_teams)); + S.append(" end_time=").append(std::to_string(TKR.end_time)); + S.append(" duration=").append(std::to_string(TKR.end_time - Record.time)); + S.append(" ns"); + break; + } + default: + S.append(" (unsupported record type)"); + break; + } + + return S; +} + +std::string internal::BufferRecordDeallocation::toString() const { + std::string S{"Deallocated "}; + S.append(makeHexString((uint64_t)Buffer)); + return S; +} diff --git a/openmp/tools/omptest/src/InternalEventOperators.cpp b/openmp/tools/omptest/src/InternalEventOperators.cpp new file mode 100644 index 0000000000000..49c61a44a7aba --- /dev/null +++ b/openmp/tools/omptest/src/InternalEventOperators.cpp @@ -0,0 +1,366 @@ +//===- InternalEventOperators.cpp - Operator implementations ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the internal event operators, like comparators. +/// +//===----------------------------------------------------------------------===// + +#include "InternalEvent.h" + +namespace omptest { + +namespace internal { +// clang-format off +event_class_operator_stub(AssertionSyncPoint) +event_class_operator_stub(AssertionSuspend) +event_class_operator_stub(ThreadBegin) +event_class_operator_stub(ThreadEnd) +event_class_operator_w_body(ParallelBegin, \ + return Expected.NumThreads == Observed.NumThreads; \ +) +event_class_operator_stub(ParallelEnd) +event_class_operator_w_body(Work, \ + bool isSameWorkType = (Expected.WorkType == Observed.WorkType); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameParallelData = \ + (Expected.ParallelData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.ParallelData == Observed.ParallelData); \ + bool isSameTaskData = \ + (Expected.TaskData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.TaskData == Observed.TaskData); \ + bool isSameCount = (Expected.Count == expectedDefault(uint64_t)) ? \ + true : (Expected.Count == Observed.Count); \ + return isSameWorkType && isSameEndpoint && isSameParallelData && \ + isSameTaskData && isSameCount; \ +) +event_class_operator_stub(Dispatch) +event_class_operator_stub(TaskCreate) +event_class_operator_stub(Dependences) +event_class_operator_stub(TaskDependence) +event_class_operator_stub(TaskSchedule) +event_class_operator_w_body(ImplicitTask, \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameActualParallelism = \ + (Expected.ActualParallelism == expectedDefault(unsigned int)) ? \ + true : (Expected.ActualParallelism == Observed.ActualParallelism); \ + bool isSameIndex = (Expected.Index == expectedDefault(unsigned int)) ? \ + true : ( Expected.Index == Observed.Index); \ + return isSameEndpoint && isSameActualParallelism && isSameIndex; \ +) +event_class_operator_stub(Masked) +event_class_operator_w_body(SyncRegion, \ + bool isSameKind = (Expected.Kind == Observed.Kind); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameParallelData = \ + (Expected.ParallelData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.ParallelData == Observed.ParallelData); \ + bool isSameTaskData = \ + (Expected.TaskData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.TaskData == Observed.TaskData); \ + return isSameKind && isSameEndpoint && isSameParallelData && isSameTaskData; \ +) +event_class_operator_stub(MutexAcquire) +event_class_operator_stub(Mutex) +event_class_operator_stub(NestLock) +event_class_operator_stub(Flush) +event_class_operator_stub(Cancel) +event_class_operator_w_body(Target, \ + bool isSameKind = (Expected.Kind == Observed.Kind); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + return isSameKind && isSameEndpoint && isSameDeviceNum; \ +) +event_class_operator_w_body(TargetEmi, \ + bool isSameKind = (Expected.Kind == Observed.Kind); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + return isSameKind && isSameEndpoint && isSameDeviceNum; \ +) +event_class_operator_w_body(TargetDataOp, \ + bool isSameOpType = (Expected.OpType == Observed.OpType); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + bool isSameSrcAddr = (Expected.SrcAddr == expectedDefault(void *)) ? \ + true : (Expected.SrcAddr == Observed.SrcAddr); \ + bool isSameDstAddr = (Expected.DstAddr == expectedDefault(void *)) ? \ + true : (Expected.DstAddr == Observed.DstAddr); \ + bool isSameSrcDeviceNum = \ + (Expected.SrcDeviceNum == expectedDefault(int)) ? \ + true : (Expected.SrcDeviceNum == Observed.SrcDeviceNum); \ + bool isSameDstDeviceNum = \ + (Expected.DstDeviceNum == expectedDefault(int)) ? \ + true : (Expected.DstDeviceNum == Observed.DstDeviceNum); \ + return isSameOpType && isSameSize && isSameSrcAddr && isSameDstAddr && \ + isSameSrcDeviceNum && isSameDstDeviceNum; \ +) +event_class_operator_w_body(TargetDataOpEmi, \ + bool isSameOpType = (Expected.OpType == Observed.OpType); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + bool isSameSrcAddr = (Expected.SrcAddr == expectedDefault(void *)) ? \ + true : (Expected.SrcAddr == Observed.SrcAddr); \ + bool isSameDstAddr = (Expected.DstAddr == expectedDefault(void *)) ? \ + true : (Expected.DstAddr == Observed.DstAddr); \ + bool isSameSrcDeviceNum = \ + (Expected.SrcDeviceNum == expectedDefault(int)) ? \ + true : (Expected.SrcDeviceNum == Observed.SrcDeviceNum); \ + bool isSameDstDeviceNum = \ + (Expected.DstDeviceNum == expectedDefault(int)) ? \ + true : (Expected.DstDeviceNum == Observed.DstDeviceNum); \ + return isSameOpType && isSameEndpoint && isSameSize && isSameSrcAddr && \ + isSameDstAddr && isSameSrcDeviceNum && isSameDstDeviceNum; \ +) +event_class_operator_w_body(TargetSubmit, \ + bool isSameReqNumTeams = \ + (Expected.RequestedNumTeams == Observed.RequestedNumTeams); \ + return isSameReqNumTeams; \ +) +event_class_operator_w_body(TargetSubmitEmi, \ + bool isSameReqNumTeams = \ + (Expected.RequestedNumTeams == Observed.RequestedNumTeams); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + return isSameReqNumTeams && isSameEndpoint; \ +) +event_class_operator_stub(ControlTool) +event_class_operator_w_body(DeviceInitialize, \ + bool isSameDeviceNum = (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameType = (Expected.Type == expectedDefault(const char *)) ? \ + true : \ + ((Expected.Type == Observed.Type) || \ + (strcmp(Expected.Type, Observed.Type) == 0)); \ + bool isSameDevice = \ + (Expected.Device == expectedDefault(ompt_device_t *)) ? \ + true : (Expected.Device == Observed.Device); \ + return isSameDeviceNum && isSameType && isSameDevice; \ +) +event_class_operator_w_body(DeviceFinalize, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + return isSameDeviceNum; +) +event_class_operator_w_body(DeviceLoad, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + return isSameDeviceNum && isSameSize; \ +) +event_class_operator_stub(DeviceUnload) +event_class_operator_w_body(BufferRequest, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t *)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + return isSameDeviceNum && isSameSize; \ +) +event_class_operator_w_body(BufferComplete, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + return isSameDeviceNum && isSameSize; \ +) +event_class_operator_w_body(BufferRecord, \ + bool isSameType = (Expected.Record.type == Observed.Record.type); \ + bool isSameTargetId = \ + (Expected.Record.target_id == expectedDefault(ompt_id_t)) \ + ? true \ + : (Expected.Record.target_id == Observed.Record.target_id); \ + if (!(isSameType && isSameTargetId)) return false; \ + bool isEqual = true; \ + ompt_device_time_t ObservedDurationNs = \ + Observed.Record.record.target_data_op.end_time - Observed.Record.time; \ + switch(Expected.Record.type) { \ + case ompt_callback_target: \ + isEqual &= \ + (Expected.Record.record.target.kind == expectedDefault(ompt_target_t)) \ + ? true \ + : (Expected.Record.record.target.kind == \ + Observed.Record.record.target.kind); \ + isEqual &= \ + (Expected.Record.record.target.endpoint == \ + expectedDefault(ompt_scope_endpoint_t)) \ + ? true \ + : (Expected.Record.record.target.endpoint == \ + Observed.Record.record.target.endpoint); \ + isEqual &= \ + (Expected.Record.record.target.device_num == expectedDefault(int)) \ + ? true \ + : (Expected.Record.record.target.device_num == \ + Observed.Record.record.target.device_num); \ + break; \ + case ompt_callback_target_data_op: \ + isEqual &= \ + (Expected.Record.record.target_data_op.optype == \ + expectedDefault(ompt_target_data_op_t)) \ + ? true \ + : (Expected.Record.record.target_data_op.optype == \ + Observed.Record.record.target_data_op.optype); \ + isEqual &= \ + (Expected.Record.record.target_data_op.bytes == expectedDefault(size_t)) \ + ? true \ + : (Expected.Record.record.target_data_op.bytes == \ + Observed.Record.record.target_data_op.bytes); \ + isEqual &= \ + (Expected.Record.record.target_data_op.src_addr == \ + expectedDefault(void *)) \ + ? true \ + : (Expected.Record.record.target_data_op.src_addr == \ + Observed.Record.record.target_data_op.src_addr); \ + isEqual &= \ + (Expected.Record.record.target_data_op.dest_addr == \ + expectedDefault(void *)) \ + ? true \ + : (Expected.Record.record.target_data_op.dest_addr == \ + Observed.Record.record.target_data_op.dest_addr); \ + isEqual &= \ + (Expected.Record.record.target_data_op.src_device_num == \ + expectedDefault(int)) \ + ? true \ + : (Expected.Record.record.target_data_op.src_device_num == \ + Observed.Record.record.target_data_op.src_device_num); \ + isEqual &= \ + (Expected.Record.record.target_data_op.dest_device_num == \ + expectedDefault(int)) \ + ? true \ + : (Expected.Record.record.target_data_op.dest_device_num == \ + Observed.Record.record.target_data_op.dest_device_num); \ + isEqual &= \ + (Expected.Record.record.target_data_op.host_op_id == \ + expectedDefault(ompt_id_t)) \ + ? true \ + : (Expected.Record.record.target_data_op.host_op_id == \ + Observed.Record.record.target_data_op.host_op_id); \ + isEqual &= \ + (Expected.Record.record.target_data_op.codeptr_ra == \ + expectedDefault(void *)) \ + ? true \ + : (Expected.Record.record.target_data_op.codeptr_ra == \ + Observed.Record.record.target_data_op.codeptr_ra); \ + if (Expected.Record.record.target_data_op.end_time != \ + expectedDefault(ompt_device_time_t)) { \ + isEqual &= \ + ObservedDurationNs <= Expected.Record.record.target_data_op.end_time; \ + } \ + isEqual &= ObservedDurationNs >= Expected.Record.time; \ + break; \ + case ompt_callback_target_submit: \ + ObservedDurationNs = \ + Observed.Record.record.target_kernel.end_time - Observed.Record.time; \ + isEqual &= \ + (Expected.Record.record.target_kernel.requested_num_teams == \ + expectedDefault(unsigned int)) \ + ? true \ + : (Expected.Record.record.target_kernel.requested_num_teams == \ + Observed.Record.record.target_kernel.requested_num_teams); \ + isEqual &= \ + (Expected.Record.record.target_kernel.granted_num_teams == \ + expectedDefault(unsigned int)) \ + ? true \ + : (Expected.Record.record.target_kernel.granted_num_teams == \ + Observed.Record.record.target_kernel.granted_num_teams); \ + isEqual &= \ + (Expected.Record.record.target_kernel.host_op_id == \ + expectedDefault(ompt_id_t)) \ + ? true \ + : (Expected.Record.record.target_kernel.host_op_id == \ + Observed.Record.record.target_kernel.host_op_id); \ + if (Expected.Record.record.target_kernel.end_time != \ + expectedDefault(ompt_device_time_t)) { \ + isEqual &= \ + ObservedDurationNs <= Expected.Record.record.target_kernel.end_time; \ + } \ + isEqual &= ObservedDurationNs >= Expected.Record.time; \ + break; \ + default: \ + assert(false && "Encountered invalid record type"); \ + } \ + return isEqual; \ +) +event_class_operator_stub(BufferRecordDeallocation) + +define_cast_func(AssertionSyncPoint) +define_cast_func(AssertionSuspend) +define_cast_func(ThreadBegin) +define_cast_func(ThreadEnd) +define_cast_func(ParallelBegin) +define_cast_func(ParallelEnd) +define_cast_func(Work) +define_cast_func(Dispatch) +define_cast_func(TaskCreate) +define_cast_func(Dependences) +define_cast_func(TaskDependence) +define_cast_func(TaskSchedule) +define_cast_func(ImplicitTask) +define_cast_func(Masked) +define_cast_func(SyncRegion) +define_cast_func(MutexAcquire) +define_cast_func(Mutex) +define_cast_func(NestLock) +define_cast_func(Flush) +define_cast_func(Cancel) +define_cast_func(Target) +define_cast_func(TargetEmi) +define_cast_func(TargetDataOp) +define_cast_func(TargetDataOpEmi) +define_cast_func(TargetSubmit) +define_cast_func(TargetSubmitEmi) +define_cast_func(ControlTool) +define_cast_func(DeviceInitialize) +define_cast_func(DeviceFinalize) +define_cast_func(DeviceLoad) +define_cast_func(DeviceUnload) +define_cast_func(BufferRequest) +define_cast_func(BufferComplete) +define_cast_func(BufferRecord) +define_cast_func(BufferRecordDeallocation) + +class_equals_op(AssertionSyncPoint) +class_equals_op(AssertionSuspend) +class_equals_op(ThreadBegin) +class_equals_op(ThreadEnd) +class_equals_op(ParallelBegin) +class_equals_op(ParallelEnd) +class_equals_op(Work) +class_equals_op(Dispatch) +class_equals_op(TaskCreate) +class_equals_op(Dependences) +class_equals_op(TaskDependence) +class_equals_op(TaskSchedule) +class_equals_op(ImplicitTask) +class_equals_op(Masked) +class_equals_op(SyncRegion) +class_equals_op(MutexAcquire) +class_equals_op(Mutex) +class_equals_op(NestLock) +class_equals_op(Flush) +class_equals_op(Cancel) +class_equals_op(Target) +class_equals_op(TargetEmi) +class_equals_op(TargetDataOp) +class_equals_op(TargetDataOpEmi) +class_equals_op(TargetSubmit) +class_equals_op(TargetSubmitEmi) +class_equals_op(ControlTool) +class_equals_op(DeviceInitialize) +class_equals_op(DeviceFinalize) +class_equals_op(DeviceLoad) +class_equals_op(DeviceUnload) +class_equals_op(BufferRequest) +class_equals_op(BufferComplete) +class_equals_op(BufferRecord) +class_equals_op(BufferRecordDeallocation) +// clang-format on + +} // namespace internal + +} // namespace omptest diff --git a/openmp/tools/omptest/src/Logging.cpp b/openmp/tools/omptest/src/Logging.cpp new file mode 100644 index 0000000000000..28329c74d188d --- /dev/null +++ b/openmp/tools/omptest/src/Logging.cpp @@ -0,0 +1,177 @@ +//===- Logging.cpp - General logging class implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements ompTest-tailored logging. +/// +//===----------------------------------------------------------------------===// + +#include "Logging.h" + +using namespace omptest; +using namespace logging; + +Logger::Logger(Level LogLevel, std::ostream &OutStream, bool FormatOutput) + : LoggingLevel(LogLevel), OutStream(OutStream), FormatOutput(FormatOutput) { + // Flush any buffered output + OutStream << std::flush; +} + +Logger::~Logger() { + // Flush any buffered output + OutStream << std::flush; +} + +std::map> AggregatedFormatOptions{ + {Level::DIAGNOSTIC, {FormatOption::COLOR_LightBlue}}, + {Level::INFO, {FormatOption::COLOR_LightGray}}, + {Level::WARNING, {FormatOption::COLOR_LightYellow}}, + {Level::ERROR, {FormatOption::COLOR_Red}}, + {Level::CRITICAL, {FormatOption::COLOR_LightRed}}, + {Level::Default, {FormatOption::NONE}}, + {Level::ExpectedEvent, {FormatOption::BOLD, FormatOption::COLOR_Cyan}}, + {Level::ObservedEvent, {FormatOption::COLOR_Cyan}}, + {Level::OffendingEvent, {FormatOption::COLOR_Yellow}}}; + +const char *logging::to_string(Level LogLevel) { + switch (LogLevel) { + case Level::DIAGNOSTIC: + return "DIAGNOSTIC"; + case Level::INFO: + return "INFO"; + case Level::WARNING: + return "WARNING"; + case Level::ERROR: + return "ERROR"; + case Level::CRITICAL: + return "CRITICAL"; + default: + assert(false && "Requested string representation for unknown LogLevel"); + return "UNKNOWN"; + } +} + +std::string logging::getFormatSequence(Level LogLevel) { + auto Options = AggregatedFormatOptions[LogLevel]; + std::stringstream SS{"\033["}; + SS << "\033["; + if (!Options.empty()) { + for (auto &Option : AggregatedFormatOptions[LogLevel]) + SS << int(Option) << ';'; + SS.seekp(-1, SS.cur); + SS << 'm'; + } else { + // Fallback to NONE / reset formatting + SS << "0m"; + } + return SS.str(); +} + +std::string logging::format(const std::string &Message, FormatOption Option) { + std::stringstream SS{"\033["}; + SS << "\033["; + SS << int(Option) << 'm' << Message << "\033[0m"; + return SS.str(); +} + +std::string logging::format(const std::string &Message, + std::set Options) { + std::stringstream SS{"\033["}; + SS << "\033["; + for (auto &Option : Options) + SS << int(Option) << ';'; + SS.seekp(-1, SS.cur); + SS << 'm' << Message << "\033[0m"; + return SS.str(); +} + +void Logger::log(Level LogLevel, const std::string &Message) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << Message << getFormatSequence() << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message << std::endl; + } +} + +void Logger::eventMismatch(const omptest::OmptAssertEvent &OffendingEvent, + const std::string &Message, Level LogLevel) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << getFormatSequence() + << format(Message, AggregatedFormatOptions[LogLevel]) + << "\n\tOffending event name='" + << format(OffendingEvent.getEventName(), + AggregatedFormatOptions[Level::OffendingEvent]) + << "'\n\tOffending='" + << format(OffendingEvent.toString(), + AggregatedFormatOptions[Level::OffendingEvent]) + << '\'' << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message + << "\n\tOffending event name='" << OffendingEvent.getEventName() + << "'\n\tOffending='" << OffendingEvent.toString() << '\'' + << std::endl; + } +} + +void Logger::eventMismatch(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent, + const std::string &Message, Level LogLevel) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << Message << getFormatSequence() + << "\n\tExpected event name='" + << format(ExpectedEvent.getEventName(), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "' observe='" + << format(to_string(ExpectedEvent.getEventExpectedState()), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "'\n\tObserved event name='" + << format(ObservedEvent.getEventName(), + AggregatedFormatOptions[Level::ObservedEvent]) + << "'\n\tExpected='" + << format(ExpectedEvent.toString(), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "'\n\tObserved='" + << format(ObservedEvent.toString(), + AggregatedFormatOptions[Level::ObservedEvent]) + << '\'' << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message + << "\n\tExpected event name='" << ExpectedEvent.getEventName() + << "' observe='" + << to_string(ExpectedEvent.getEventExpectedState()) + << "'\n\tObserved event name='" << ObservedEvent.getEventName() + << "'\n\tExpected='" << ExpectedEvent.toString() + << "'\n\tObserved='" << ObservedEvent.toString() << '\'' + << std::endl; + } +} + +void Logger::setFormatOutput(bool Enabled) { FormatOutput = Enabled; } + +Level Logger::getLoggingLevel() const { return LoggingLevel; } + +void Logger::setLoggingLevel(Level LogLevel) { LoggingLevel = LogLevel; } diff --git a/openmp/tools/omptest/src/OmptAssertEvent.cpp b/openmp/tools/omptest/src/OmptAssertEvent.cpp new file mode 100644 index 0000000000000..b03f267a8c397 --- /dev/null +++ b/openmp/tools/omptest/src/OmptAssertEvent.cpp @@ -0,0 +1,587 @@ +//===- OmptAssertEvent.cpp - Assertion event implementations ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements assertion event CTORs, for generally all observable events. +/// +//===----------------------------------------------------------------------===// + +#include "OmptAssertEvent.h" +#include + +using namespace omptest; + +const char *omptest::to_string(ObserveState State) { + switch (State) { + case ObserveState::generated: + return "generated"; + case ObserveState::always: + return "always"; + case ObserveState::never: + return "never"; + default: + assert(false && "Requested string representation for unknown ObserveState"); + return "UNKNOWN"; + } +} + +OmptAssertEvent::OmptAssertEvent(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + internal::InternalEvent *IE) + : Name(Name), Group(Group), ExpectedState(Expected), TheEvent(IE) {} + +OmptAssertEvent OmptAssertEvent::AssertionSyncPoint( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, const std::string &SyncPointName) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::AssertionSyncPoint(SyncPointName)); +} + +OmptAssertEvent +OmptAssertEvent::AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::AssertionSuspend()); +} + +OmptAssertEvent OmptAssertEvent::ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ThreadBegin(ThreadType)); +} + +OmptAssertEvent OmptAssertEvent::ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::ThreadEnd()); +} + +OmptAssertEvent OmptAssertEvent::ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ParallelBegin(NumThreads)); +} + +OmptAssertEvent OmptAssertEvent::ParallelEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ParallelEnd(ParallelData, + EncounteringTaskData, Flags, + CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::Work(WorkType, Endpoint, ParallelData, + TaskData, Count, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::Dispatch(ParallelData, TaskData, Kind, Instance)); +} + +OmptAssertEvent OmptAssertEvent::TaskCreate( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *NewTaskData, + int Flags, int HasDependences, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TaskCreate(EncounteringTaskData, EncounteringTaskFrame, + NewTaskData, Flags, HasDependences, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::TaskSchedule()); +} + +OmptAssertEvent OmptAssertEvent::ImplicitTask( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + unsigned int ActualParallelism, unsigned int Index, int Flags) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ImplicitTask(Endpoint, ParallelData, + TaskData, ActualParallelism, + Index, Flags)); +} + +OmptAssertEvent OmptAssertEvent::SyncRegion( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::SyncRegion(Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_id_t TargetId, + const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::Target(Kind, Endpoint, DeviceNum, + TaskData, TargetId, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetEmi(Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, + TargetData, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOp( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetDataOp( + TargetId, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOp( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, size_t Bytes, + void *SrcAddr, void *DstAddr, int SrcDeviceNum, int DstDeviceNum, + ompt_id_t TargetId, ompt_id_t HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetDataOp( + TargetId, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOpEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetDataOpEmi(Endpoint, TargetTaskData, TargetData, + HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOpEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + ompt_scope_endpoint_t Endpoint, size_t Bytes, void *SrcAddr, void *DstAddr, + int SrcDeviceNum, int DstDeviceNum, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetDataOpEmi(Endpoint, TargetTaskData, TargetData, + HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_id_t TargetId, + ompt_id_t HostOpId, + unsigned int RequestedNumTeams) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetSubmit(TargetId, HostOpId, RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + unsigned int RequestedNumTeams, + ompt_id_t TargetId, + ompt_id_t HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetSubmit(TargetId, HostOpId, RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmitEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetSubmitEmi(Endpoint, TargetData, + HostOpId, + RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmitEmi(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + unsigned int RequestedNumTeams, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, + ompt_id_t *HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetSubmitEmi(Endpoint, TargetData, + HostOpId, + RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::ControlTool(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::ControlTool()); +} + +OmptAssertEvent OmptAssertEvent::DeviceInitialize( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, const char *Type, + ompt_device_t *Device, ompt_function_lookup_t LookupFn, + const char *DocumentationStr) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::DeviceInitialize(DeviceNum, Type, Device, + LookupFn, + DocumentationStr)); +} + +OmptAssertEvent OmptAssertEvent::DeviceFinalize(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::DeviceFinalize(DeviceNum)); +} + +OmptAssertEvent +OmptAssertEvent::DeviceLoad(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Filename, int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, void *HostAddr, + void *DeviceAddr, uint64_t ModuleId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::DeviceLoad(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId)); +} + +OmptAssertEvent OmptAssertEvent::DeviceUnload(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::DeviceUnload()); +} + +OmptAssertEvent OmptAssertEvent::BufferRequest(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum, + ompt_buffer_t **Buffer, + size_t *Bytes) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRequest(DeviceNum, Buffer, Bytes)); +} + +OmptAssertEvent OmptAssertEvent::BufferComplete( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, ompt_buffer_t *Buffer, + size_t Bytes, ompt_buffer_cursor_t Begin, int BufferOwned) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferComplete(DeviceNum, Buffer, Bytes, + Begin, BufferOwned)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_record_ompt_t *Record) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(Record)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, ompt_id_t TaskId, + ompt_id_t TargetId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + if (Type != ompt_callback_target) + assert(false && "CTOR only suited for type: 'ompt_callback_target'"); + + ompt_record_target_t Subrecord{Kind, Endpoint, DeviceNum, + TaskId, TargetId, CodeptrRA}; + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + RecordPtr->time = expectedDefault(ompt_device_time_t); + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + std::pair Timeframe, void *SrcAddr, + void *DstAddr, int SrcDeviceNum, int DstDeviceNum, ompt_id_t TargetId, + ompt_id_t HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + if (Type != ompt_callback_target_data_op) + assert(false && + "CTOR only suited for type: 'ompt_callback_target_data_op'"); + + ompt_record_target_data_op_t Subrecord{ + HostOpId, OpType, SrcAddr, SrcDeviceNum, DstAddr, + DstDeviceNum, Bytes, Timeframe.second, CodeptrRA}; + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + RecordPtr->time = Timeframe.first; + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target_data_op = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + ompt_device_time_t MinimumTimeDelta, void *SrcAddr, void *DstAddr, + int SrcDeviceNum, int DstDeviceNum, ompt_id_t TargetId, ompt_id_t HostOpId, + const void *CodeptrRA) { + return BufferRecord(Name, Group, Expected, Type, OpType, Bytes, + {MinimumTimeDelta, expectedDefault(ompt_device_time_t)}, + SrcAddr, DstAddr, SrcDeviceNum, DstDeviceNum, TargetId, + HostOpId, CodeptrRA); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + std::pair Timeframe, + unsigned int RequestedNumTeams, unsigned int GrantedNumTeams, + ompt_id_t TargetId, ompt_id_t HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + bool isDefault = (Timeframe.first == expectedDefault(ompt_device_time_t)); + isDefault &= (Timeframe.second == expectedDefault(ompt_device_time_t)); + isDefault &= (RequestedNumTeams == expectedDefault(unsigned int)); + isDefault &= (GrantedNumTeams == expectedDefault(unsigned int)); + isDefault &= (TargetId == expectedDefault(ompt_id_t)); + isDefault &= (HostOpId == expectedDefault(ompt_id_t)); + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + + // This handles the simplest occurrence of a device tracing record + // We can only check for Type -- since all other properties are set to default + if (isDefault) { + RecordPtr->time = expectedDefault(ompt_device_time_t); + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = expectedDefault(ompt_id_t); + if (Type == ompt_callback_target) { + ompt_record_target_t Subrecord{expectedDefault(ompt_target_t), + expectedDefault(ompt_scope_endpoint_t), + expectedDefault(int), + expectedDefault(ompt_id_t), + expectedDefault(ompt_id_t), + expectedDefault(void *)}; + RecordPtr->record.target = Subrecord; + } + + if (Type == ompt_callback_target_data_op) { + ompt_record_target_data_op_t Subrecord{ + expectedDefault(ompt_id_t), expectedDefault(ompt_target_data_op_t), + expectedDefault(void *), expectedDefault(int), + expectedDefault(void *), expectedDefault(int), + expectedDefault(size_t), expectedDefault(ompt_device_time_t), + expectedDefault(void *)}; + RecordPtr->record.target_data_op = Subrecord; + } + + if (Type == ompt_callback_target_submit) { + ompt_record_target_kernel_t Subrecord{ + expectedDefault(ompt_id_t), expectedDefault(unsigned int), + expectedDefault(unsigned int), expectedDefault(ompt_device_time_t)}; + RecordPtr->record.target_kernel = Subrecord; + } + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); + } + + if (Type != ompt_callback_target_submit) + assert(false && "CTOR only suited for type: 'ompt_callback_target_submit'"); + + ompt_record_target_kernel_t Subrecord{HostOpId, RequestedNumTeams, + GrantedNumTeams, Timeframe.second}; + + RecordPtr->time = Timeframe.first; + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target_kernel = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_device_time_t MinimumTimeDelta, unsigned int RequestedNumTeams, + unsigned int GrantedNumTeams, ompt_id_t TargetId, ompt_id_t HostOpId) { + return BufferRecord(Name, Group, Expected, Type, + {MinimumTimeDelta, expectedDefault(ompt_device_time_t)}, + RequestedNumTeams, GrantedNumTeams, TargetId, HostOpId); +} + +OmptAssertEvent OmptAssertEvent::BufferRecordDeallocation( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_buffer_t *Buffer) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecordDeallocation(Buffer)); +} + +std::string OmptAssertEvent::getEventName() const { return Name; } + +std::string OmptAssertEvent::getEventGroup() const { return Group; } + +ObserveState OmptAssertEvent::getEventExpectedState() const { + return ExpectedState; +} + +internal::EventTy OmptAssertEvent::getEventType() const { + return TheEvent->getType(); +} + +internal::InternalEvent *OmptAssertEvent::getEvent() const { + return TheEvent.get(); +} + +std::string OmptAssertEvent::toString(bool PrefixEventName) const { + std::string S; + if (PrefixEventName) + S.append(getEventName()).append(": "); + S.append((TheEvent == nullptr) ? "OmptAssertEvent" : TheEvent->toString()); + return S; +} + +bool omptest::operator==(const OmptAssertEvent &A, const OmptAssertEvent &B) { + assert(A.TheEvent.get() != nullptr && "A is valid"); + assert(B.TheEvent.get() != nullptr && "B is valid"); + + return A.TheEvent->getType() == B.TheEvent->getType() && + A.TheEvent->equals(B.TheEvent.get()); +} diff --git a/openmp/tools/omptest/src/OmptAsserter.cpp b/openmp/tools/omptest/src/OmptAsserter.cpp new file mode 100644 index 0000000000000..1c2f2dee69e16 --- /dev/null +++ b/openmp/tools/omptest/src/OmptAsserter.cpp @@ -0,0 +1,480 @@ +//===- OmptAsserter.cpp - Asserter-related implementations ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements all asserter-related class methods, like: notifications, handling +/// of groups or determination of the testcase state. +/// +//===----------------------------------------------------------------------===// + +#include "OmptAsserter.h" +#include "Logging.h" + +#include + +using namespace omptest; +using namespace internal; + +// Initialize static members +std::mutex OmptAsserter::StaticMemberAccessMutex; +std::weak_ptr + OmptAsserter::EventGroupInterfaceInstance; +std::weak_ptr OmptAsserter::LoggingInstance; + +OmptAsserter::OmptAsserter() { + // Protect static members access + std::lock_guard Lock(StaticMemberAccessMutex); + + // Upgrade OmptEventGroupInterface weak_ptr to shared_ptr + { + EventGroups = EventGroupInterfaceInstance.lock(); + if (!EventGroups) { + // Coordinator doesn't exist or was previously destroyed, create a new + // one. + EventGroups = std::make_shared(); + // Store a weak reference to it + EventGroupInterfaceInstance = EventGroups; + } + // EventGroups is now a valid shared_ptr, either to a new or existing + // instance. + } + + // Upgrade logging::Logger weak_ptr to shared_ptr + { + Log = LoggingInstance.lock(); + if (!Log) { + // Coordinator doesn't exist or was previously destroyed, create a new + // one. + Log = std::make_shared(); + // Store a weak reference to it + LoggingInstance = Log; + } + // Log is now a valid shared_ptr, either to a new or existing instance. + } +} + +void OmptListener::setActive(bool Enabled) { Active = Enabled; } + +bool OmptListener::isActive() { return Active; } + +bool OmptListener::isSuppressedEventType(EventTy EvTy) { + return SuppressedEvents.find(EvTy) != SuppressedEvents.end(); +} + +void OmptListener::permitEvent(EventTy EvTy) { SuppressedEvents.erase(EvTy); } + +void OmptListener::suppressEvent(EventTy EvTy) { + SuppressedEvents.insert(EvTy); +} + +void OmptAsserter::insert(OmptAssertEvent &&AE) { + assert(false && "Base class 'insert' has undefined semantics."); +} + +void OmptAsserter::notify(OmptAssertEvent &&AE) { + // Ignore notifications while inactive + if (!isActive() || isSuppressedEventType(AE.getEventType())) + return; + + this->notifyImpl(std::move(AE)); +} + +AssertState OmptAsserter::checkState() { return State; } + +bool OmptAsserter::verifyEventGroups(const OmptAssertEvent &ExpectedEvent, + const OmptAssertEvent &ObservedEvent) { + assert(ExpectedEvent.getEventType() == ObservedEvent.getEventType() && + "Type mismatch: Expected != Observed event type"); + assert(EventGroups && "Missing EventGroups interface"); + + // Ignore all events within "default" group + auto GroupName = ExpectedEvent.getEventGroup(); + + if (GroupName == "default") + return true; + + // Get a pointer to the observed internal event + auto Event = ObservedEvent.getEvent(); + + switch (Event->getType()) { + case EventTy::Target: + if (auto E = static_cast(Event)) { + if (E->Endpoint == ompt_scope_begin) { + // Add new group since we entered a Target Region + EventGroups->addActiveEventGroup(GroupName, + AssertEventGroup{E->TargetId}); + } else if (E->Endpoint == ompt_scope_end) { + // Deprecate group since we return from a Target Region + EventGroups->deprecateActiveEventGroup(GroupName); + } + return true; + } + return false; + case EventTy::TargetEmi: + if (auto E = static_cast(Event)) { + if (E->Endpoint == ompt_scope_begin) { + // Add new group since we entered a Target Region + EventGroups->addActiveEventGroup( + GroupName, AssertEventGroup{E->TargetData->value}); + } else if (E->Endpoint == ompt_scope_end) { + // Deprecate group since we return from a Target Region + EventGroups->deprecateActiveEventGroup(GroupName); + } + return true; + } + return false; + case EventTy::TargetDataOp: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups(GroupName, + AssertEventGroup{E->TargetId}); + + return false; + case EventTy::TargetDataOpEmi: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->TargetData->value}); + + return false; + case EventTy::TargetSubmit: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups(GroupName, + AssertEventGroup{E->TargetId}); + + return false; + case EventTy::TargetSubmitEmi: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->TargetData->value}); + + return false; + case EventTy::BufferRecord: + // BufferRecords are delivered asynchronously: also check deprecated groups. + if (auto E = static_cast(Event)) + return (EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->Record.target_id}) || + EventGroups->checkDeprecatedEventGroups( + GroupName, AssertEventGroup{E->Record.target_id})); + return false; + // Some event types do not need any handling + case EventTy::ThreadBegin: + case EventTy::ThreadEnd: + case EventTy::ParallelBegin: + case EventTy::ParallelEnd: + case EventTy::Work: + case EventTy::Dispatch: + case EventTy::TaskCreate: + case EventTy::Dependences: + case EventTy::TaskDependence: + case EventTy::TaskSchedule: + case EventTy::ImplicitTask: + case EventTy::Masked: + case EventTy::SyncRegion: + case EventTy::MutexAcquire: + case EventTy::Mutex: + case EventTy::NestLock: + case EventTy::Flush: + case EventTy::Cancel: + case EventTy::DeviceInitialize: + case EventTy::DeviceFinalize: + case EventTy::DeviceLoad: + case EventTy::DeviceUnload: + case EventTy::BufferRequest: + case EventTy::BufferComplete: + case EventTy::BufferRecordDeallocation: + return true; + // Some event types must not be encountered + case EventTy::None: + case EventTy::AssertionSyncPoint: + case EventTy::AssertionSuspend: + default: + assert(false && "Encountered invalid event type"); + } + + return true; +} + +void OmptAsserter::setOperationMode(AssertMode Mode) { OperationMode = Mode; } + +void OmptSequencedAsserter::insert(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + Events.emplace_back(std::move(AE)); +} + +void OmptSequencedAsserter::notifyImpl(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + // Ignore notifications while inactive, or for suppressed events + if (Events.empty() || !isActive() || isSuppressedEventType(AE.getEventType())) + return; + + ++NumNotifications; + + // Note: Order of these checks has semantic meaning. + // (1) Synchronization points should fail if there are remaining events, + // otherwise pass. (2) Regular notification while no further events are + // expected: fail. (3) Assertion suspension relies on a next expected event + // being available. (4) All other cases are considered 'regular' and match the + // next expected against the observed event. (5+6) Depending on the state / + // mode we signal failure if no other check has done already, or signaled pass + // by early-exit. + if (consumeSyncPoint(AE) || // Handle observed SyncPoint event + checkExcessNotify(AE) || // Check for remaining expected + consumeSuspend() || // Handle requested suspend + consumeRegularEvent(AE) || // Handle regular event + AssertionSuspended || // Ignore fail, if suspended + OperationMode == AssertMode::relaxed) // Ignore fail, if relaxed op-mode + return; + + Log->eventMismatch(Events[NextEvent], AE, + "[OmptSequencedAsserter] The events are not equal"); + State = AssertState::fail; +} + +bool OmptSequencedAsserter::consumeSyncPoint( + const omptest::OmptAssertEvent &AE) { + if (AE.getEventType() == EventTy::AssertionSyncPoint) { + auto NumRemainingEvents = getRemainingEventCount(); + // Upon encountering a SyncPoint, all events should have been processed + if (NumRemainingEvents == 0) + return true; + + Log->eventMismatch( + AE, + "[OmptSequencedAsserter] Encountered SyncPoint while still awaiting " + + std::to_string(NumRemainingEvents) + " events. Asserted " + + std::to_string(NumSuccessfulAsserts) + "/" + + std::to_string(Events.size()) + " events successfully."); + State = AssertState::fail; + return true; + } + + // Nothing to process: continue. + return false; +} + +bool OmptSequencedAsserter::checkExcessNotify( + const omptest::OmptAssertEvent &AE) { + if (NextEvent >= Events.size()) { + // If we are not expecting any more events and passively asserting: return + if (AssertionSuspended) + return true; + + Log->eventMismatch( + AE, "[OmptSequencedAsserter] Too many events to check (" + + std::to_string(NumNotifications) + "). Asserted " + + std::to_string(NumSuccessfulAsserts) + "/" + + std::to_string(Events.size()) + " events successfully."); + State = AssertState::fail; + return true; + } + + // Remaining expected events present: continue. + return false; +} + +bool OmptSequencedAsserter::consumeSuspend() { + // On AssertionSuspend -- enter 'passive' assertion. + // Since we may encounter multiple, successive AssertionSuspend events, loop + // until we hit the next non-AssertionSuspend event. + while (Events[NextEvent].getEventType() == EventTy::AssertionSuspend) { + AssertionSuspended = true; + // We just hit the very last event: indicate early exit. + if (++NextEvent >= Events.size()) + return true; + } + + // Continue with remaining notification logic. + return false; +} + +bool OmptSequencedAsserter::consumeRegularEvent( + const omptest::OmptAssertEvent &AE) { + // If we are actively asserting, increment the event counter. + // Otherwise: If passively asserting, we will keep waiting for a match. + auto &E = Events[NextEvent]; + if (E == AE && verifyEventGroups(E, AE)) { + if (E.getEventExpectedState() == ObserveState::always) { + ++NumSuccessfulAsserts; + } else if (E.getEventExpectedState() == ObserveState::never) { + Log->eventMismatch(E, AE, + "[OmptSequencedAsserter] Encountered forbidden event"); + State = AssertState::fail; + } + + // Return to active assertion + if (AssertionSuspended) + AssertionSuspended = false; + + // Match found, increment index and indicate early exit (success). + ++NextEvent; + return true; + } + + // Continue with remaining notification logic. + return false; +} + +size_t OmptSequencedAsserter::getRemainingEventCount() { + return std::count_if(Events.begin(), Events.end(), + [](const omptest::OmptAssertEvent &E) { + return E.getEventExpectedState() == + ObserveState::always; + }) - + NumSuccessfulAsserts; +} + +AssertState OmptSequencedAsserter::checkState() { + // This is called after the testcase executed. + // Once reached the number of successful notifications should be equal to the + // number of expected events. However, there may still be excluded as well as + // special asserter events remaining in the sequence. + for (size_t i = NextEvent; i < Events.size(); ++i) { + auto &E = Events[i]; + if (E.getEventExpectedState() == ObserveState::always) { + State = AssertState::fail; + Log->eventMismatch(E, "[OmptSequencedAsserter] Expected event was not " + "encountered (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + break; + } + } + + return State; +} + +void OmptEventAsserter::insert(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + Events.emplace_back(std::move(AE)); +} + +void OmptEventAsserter::notifyImpl(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + if (Events.empty() || !isActive() || isSuppressedEventType(AE.getEventType())) + return; + + if (NumEvents == 0) + NumEvents = Events.size(); + + ++NumNotifications; + + if (AE.getEventType() == EventTy::AssertionSyncPoint) { + auto NumRemainingEvents = getRemainingEventCount(); + // Upon encountering a SyncPoint, all events should have been processed + if (NumRemainingEvents == 0) + return; + + Log->eventMismatch( + AE, "[OmptEventAsserter] Encountered SyncPoint while still awaiting " + + std::to_string(NumRemainingEvents) + " events. Asserted " + + std::to_string(NumSuccessfulAsserts) + " events successfully."); + State = AssertState::fail; + return; + } + + for (size_t i = 0; i < Events.size(); ++i) { + auto &E = Events[i]; + if (E == AE && verifyEventGroups(E, AE)) { + if (E.getEventExpectedState() == ObserveState::always) { + Events.erase(Events.begin() + i); + ++NumSuccessfulAsserts; + } else if (E.getEventExpectedState() == ObserveState::never) { + Log->eventMismatch(E, AE, + "[OmptEventAsserter] Encountered forbidden event"); + State = AssertState::fail; + } + return; + } + } + + if (OperationMode == AssertMode::strict) { + Log->eventMismatch(AE, "[OmptEventAsserter] Too many events to check (" + + std::to_string(NumNotifications) + + "). Asserted " + + std::to_string(NumSuccessfulAsserts) + + " events successfully. (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + State = AssertState::fail; + return; + } +} + +size_t OmptEventAsserter::getRemainingEventCount() { + // size_t EventCount = std::count_if(Events.begin(), Events.end(), [](const + // omptest::OmptAssertEvent &E) { return E.getEventExpectedState() == + // ObserveState::always; }); + return std::count_if( + Events.begin(), Events.end(), [](const omptest::OmptAssertEvent &E) { + return E.getEventExpectedState() == ObserveState::always; + }); +} + +AssertState OmptEventAsserter::checkState() { + // This is called after the testcase executed. + // Once reached no more expected events should be in the queue + for (const auto &E : Events) { + // Check if any of the remaining events were expected to be observed + if (E.getEventExpectedState() == ObserveState::always) { + State = AssertState::fail; + Log->eventMismatch(E, "[OmptEventAsserter] Expected event was not " + "encountered (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + break; + } + } + + return State; +} + +void OmptEventReporter::notify(OmptAssertEvent &&AE) { + if (!isActive() || isSuppressedEventType(AE.getEventType())) + return; + + // Prepare notification, containing the newline to avoid stream interleaving. + auto Notification{AE.toString()}; + Notification.push_back('\n'); + OutStream << Notification; +} + +bool OmptEventGroupInterface::addActiveEventGroup( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + if (EventGroup != ActiveEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion) + return false; + ActiveEventGroups.emplace(GroupName, Group); + return true; +} + +bool OmptEventGroupInterface::deprecateActiveEventGroup( + const std::string &GroupName) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + auto DeprecatedEventGroup = DeprecatedEventGroups.find(GroupName); + if (EventGroup == ActiveEventGroups.end() && + DeprecatedEventGroup != DeprecatedEventGroups.end()) + return false; + DeprecatedEventGroups.emplace(GroupName, EventGroup->second); + ActiveEventGroups.erase(GroupName); + return true; +} + +bool OmptEventGroupInterface::checkActiveEventGroups( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + return (EventGroup != ActiveEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion); +} + +bool OmptEventGroupInterface::checkDeprecatedEventGroups( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = DeprecatedEventGroups.find(GroupName); + return (EventGroup != DeprecatedEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion); +} diff --git a/openmp/tools/omptest/src/OmptCallbackHandler.cpp b/openmp/tools/omptest/src/OmptCallbackHandler.cpp new file mode 100644 index 0000000000000..0794a1c27a902 --- /dev/null +++ b/openmp/tools/omptest/src/OmptCallbackHandler.cpp @@ -0,0 +1,445 @@ +//===- OmptCallbackHandler.cpp - OMPT Callback handling impl. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the OMPT callback handling implementations. +/// +//===----------------------------------------------------------------------===// + +#include "OmptCallbackHandler.h" + +using namespace omptest; + +OmptCallbackHandler *Handler = nullptr; + +OmptCallbackHandler &OmptCallbackHandler::get() { + if (Handler == nullptr) + Handler = new OmptCallbackHandler(); + + return *Handler; +} + +void OmptCallbackHandler::subscribe(OmptListener *Listener) { + Subscribers.push_back(Listener); +} + +void OmptCallbackHandler::clearSubscribers() { + replay(); + + Subscribers.clear(); +} + +void OmptCallbackHandler::replay() { + if (!RecordAndReplay) + return; + + for (auto &E : RecordedEvents) + for (const auto &S : Subscribers) + S->notify(std::move(E)); +} + +void OmptCallbackHandler::handleThreadBegin(ompt_thread_t ThreadType, + ompt_data_t *ThreadData) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ThreadBegin( + "Thread Begin", "", ObserveState::generated, ThreadType)); + return; + } + + // Initial thread event likely to preceed assertion registration, so skip + if (ThreadType == ompt_thread_initial) + return; + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ThreadBegin( + "Thread Begin", "", ObserveState::generated, ThreadType)); +} + +void OmptCallbackHandler::handleThreadEnd(ompt_data_t *ThreadData) { + if (RecordAndReplay) { + recordEvent( + OmptAssertEvent::ThreadEnd("Thread End", "", ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify( + OmptAssertEvent::ThreadEnd("Thread End", "", ObserveState::generated)); +} + +void OmptCallbackHandler::handleTaskCreate( + ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *NewTaskData, + int Flags, int HasDependences, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TaskCreate( + "Task Create", "", ObserveState::generated, EncounteringTaskData, + EncounteringTaskFrame, NewTaskData, Flags, HasDependences, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TaskCreate( + "Task Create", "", ObserveState::generated, EncounteringTaskData, + EncounteringTaskFrame, NewTaskData, Flags, HasDependences, CodeptrRA)); +} + +void OmptCallbackHandler::handleTaskSchedule(ompt_data_t *PriorTaskData, + ompt_task_status_t PriorTaskStatus, + ompt_data_t *NextTaskData) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TaskSchedule("Task Schedule", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TaskSchedule("Task Schedule", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleImplicitTask(ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, + unsigned int ActualParallelism, + unsigned int Index, int Flags) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ImplicitTask( + "Implicit Task", "", ObserveState::generated, Endpoint, ParallelData, + TaskData, ActualParallelism, Index, Flags)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ImplicitTask( + "Implicit Task", "", ObserveState::generated, Endpoint, ParallelData, + TaskData, ActualParallelism, Index, Flags)); +} + +void OmptCallbackHandler::handleParallelBegin( + ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *ParallelData, + unsigned int RequestedParallelism, int Flags, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ParallelBegin( + "Parallel Begin", "", ObserveState::generated, RequestedParallelism)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ParallelBegin( + "Parallel Begin", "", ObserveState::generated, RequestedParallelism)); +} + +void OmptCallbackHandler::handleParallelEnd(ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ParallelEnd("Parallel End", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ParallelEnd("Parallel End", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleDeviceInitialize( + int DeviceNum, const char *Type, ompt_device_t *Device, + ompt_function_lookup_t LookupFn, const char *DocumentationStr) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceInitialize( + "Device Inititalize", "", ObserveState::generated, DeviceNum, Type, + Device, LookupFn, DocumentationStr)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceInitialize( + "Device Inititalize", "", ObserveState::generated, DeviceNum, Type, + Device, LookupFn, DocumentationStr)); +} + +void OmptCallbackHandler::handleDeviceFinalize(int DeviceNum) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceFinalize( + "Device Finalize", "", ObserveState::generated, DeviceNum)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceFinalize( + "Device Finalize", "", ObserveState::generated, DeviceNum)); +} + +void OmptCallbackHandler::handleTarget(ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_id_t TargetId, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Target("Target", "", ObserveState::generated, + Kind, Endpoint, DeviceNum, TaskData, + TargetId, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Target("Target", "", ObserveState::generated, + Kind, Endpoint, DeviceNum, TaskData, + TargetId, CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetEmi(ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetEmi( + "Target EMI", "", ObserveState::generated, Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, TargetData, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetEmi( + "Target EMI", "", ObserveState::generated, Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, TargetData, CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetSubmit(ompt_id_t TargetId, + ompt_id_t HostOpId, + unsigned int RequestedNumTeams) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetSubmit("Target Submit", "", + ObserveState::generated, TargetId, + HostOpId, RequestedNumTeams)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetSubmit("Target Submit", "", + ObserveState::generated, TargetId, + HostOpId, RequestedNumTeams)); +} + +void OmptCallbackHandler::handleTargetSubmitEmi( + ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetData, + ompt_id_t *HostOpId, unsigned int RequestedNumTeams) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetSubmitEmi( + "Target Submit EMI", "", ObserveState::generated, Endpoint, TargetData, + HostOpId, RequestedNumTeams)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetSubmitEmi( + "Target Submit EMI", "", ObserveState::generated, Endpoint, TargetData, + HostOpId, RequestedNumTeams)); +} + +void OmptCallbackHandler::handleTargetDataOp( + ompt_id_t TargetId, ompt_id_t HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetDataOp( + "Target Data Op", "", ObserveState::generated, TargetId, HostOpId, + OpType, SrcAddr, SrcDeviceNum, DstAddr, DstDeviceNum, Bytes, + CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetDataOp( + "Target Data Op", "", ObserveState::generated, TargetId, HostOpId, + OpType, SrcAddr, SrcDeviceNum, DstAddr, DstDeviceNum, Bytes, + CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetDataOpEmi( + ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetDataOpEmi( + "Target Data Op EMI", "", ObserveState::generated, Endpoint, + TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetDataOpEmi( + "Target Data Op EMI", "", ObserveState::generated, Endpoint, + TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +void OmptCallbackHandler::handleDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceLoad( + "Device Load", "", ObserveState::generated, DeviceNum, Filename, + OffsetInFile, VmaInFile, Bytes, HostAddr, DeviceAddr, ModuleId)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceLoad( + "Device Load", "", ObserveState::generated, DeviceNum, Filename, + OffsetInFile, VmaInFile, Bytes, HostAddr, DeviceAddr, ModuleId)); +} + +void OmptCallbackHandler::handleDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceUnload("Device Unload", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceUnload("Device Unload", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleBufferRequest(int DeviceNum, + ompt_buffer_t **Buffer, + size_t *Bytes) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRequest("Buffer Request", "", + ObserveState::generated, + DeviceNum, Buffer, Bytes)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRequest("Buffer Request", "", + ObserveState::generated, DeviceNum, + Buffer, Bytes)); +} + +void OmptCallbackHandler::handleBufferComplete(int DeviceNum, + ompt_buffer_t *Buffer, + size_t Bytes, + ompt_buffer_cursor_t Begin, + int BufferOwned) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferComplete( + "Buffer Complete", "", ObserveState::generated, DeviceNum, Buffer, + Bytes, Begin, BufferOwned)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferComplete( + "Buffer Complete", "", ObserveState::generated, DeviceNum, Buffer, + Bytes, Begin, BufferOwned)); +} + +void OmptCallbackHandler::handleBufferRecord(ompt_record_ompt_t *Record) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRecord("Buffer Record", "", + ObserveState::generated, Record)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRecord("Buffer Record", "", + ObserveState::generated, Record)); +} + +void OmptCallbackHandler::handleBufferRecordDeallocation( + ompt_buffer_t *Buffer) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRecordDeallocation( + "Buffer Deallocation", "", ObserveState::generated, Buffer)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRecordDeallocation( + "Buffer Deallocation", "", ObserveState::generated, Buffer)); +} + +void OmptCallbackHandler::handleWork(ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Work("Work", "", ObserveState::generated, + WorkType, Endpoint, ParallelData, + TaskData, Count, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Work("Work", "", ObserveState::generated, + WorkType, Endpoint, ParallelData, TaskData, + Count, CodeptrRA)); +} + +void OmptCallbackHandler::handleSyncRegion(ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::SyncRegion( + "SyncRegion", "", ObserveState::generated, Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::SyncRegion( + "SyncRegion", "", ObserveState::generated, Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); +} + +void OmptCallbackHandler::handleDispatch(ompt_data_t *ParallelData, + ompt_data_t *TaskData, + ompt_dispatch_t Kind, + ompt_data_t Instance) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Dispatch("Dispatch", "", + ObserveState::generated, ParallelData, + TaskData, Kind, Instance)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Dispatch("Dispatch", "", ObserveState::generated, + ParallelData, TaskData, Kind, + Instance)); +} + +void OmptCallbackHandler::handleAssertionSyncPoint( + const std::string &SyncPointName) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::AssertionSyncPoint( + "Assertion SyncPoint", "", ObserveState::generated, SyncPointName)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::AssertionSyncPoint( + "Assertion SyncPoint", "", ObserveState::generated, SyncPointName)); +} + +void OmptCallbackHandler::recordEvent(OmptAssertEvent &&Event) { + RecordedEvents.emplace_back(std::forward(Event)); +} diff --git a/openmp/tools/omptest/src/OmptTester.cpp b/openmp/tools/omptest/src/OmptTester.cpp new file mode 100644 index 0000000000000..22de91046fbdc --- /dev/null +++ b/openmp/tools/omptest/src/OmptTester.cpp @@ -0,0 +1,504 @@ +//===- OmptTester.cpp - ompTest OMPT tool implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the core implementation file for the ompTest library. +/// It provides the actual OMPT tool implementation: registers callbacks, etc. +/// OMPT callbacks are passed to their corresponding handler, which in turn +/// notifies all registered asserters. +/// +//===----------------------------------------------------------------------===// + +#include "OmptTester.h" + +#include +#include +#include +#include + +using namespace omptest; + +// Callback handler, which receives and relays OMPT callbacks +extern OmptCallbackHandler *Handler; + +// EventListener, which will actually print the OMPT events +static OmptEventReporter *EventReporter; + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +#define OMPT_BUFFER_REQUEST_SIZE 256 + +#ifdef OPENMP_LIBOMPTEST_BUILD_STANDALONE +std::map TestRegistrar::Tests; +#endif + +static std::atomic NextOpId{0x8000000000000001}; +static bool UseEMICallbacks = false; +static bool UseTracing = false; +static bool RunAsTestSuite = false; +static bool ColoredLog = false; + +// OMPT entry point handles +static ompt_set_trace_ompt_t ompt_set_trace_ompt = 0; +static ompt_start_trace_t ompt_start_trace = 0; +static ompt_flush_trace_t ompt_flush_trace = 0; +static ompt_stop_trace_t ompt_stop_trace = 0; +static ompt_get_record_ompt_t ompt_get_record_ompt = 0; +static ompt_advance_buffer_cursor_t ompt_advance_buffer_cursor = 0; +static ompt_get_record_type_t ompt_get_record_type_fn = 0; + +// OMPT device side tracing: Currently traced devices +typedef std::unordered_set OmptDeviceSetTy; +typedef std::unique_ptr OmptDeviceSetPtrTy; +static OmptDeviceSetPtrTy TracedDevices; + +// OMPT callbacks + +// Trace record callbacks +static void on_ompt_callback_buffer_request(int device_num, + ompt_buffer_t **buffer, + size_t *bytes) { + *bytes = OMPT_BUFFER_REQUEST_SIZE; + *buffer = malloc(*bytes); + OmptCallbackHandler::get().handleBufferRequest(device_num, buffer, bytes); +} + +// Note: This callback must handle a null begin cursor. Currently, +// ompt_get_record_ompt, print_record_ompt, and +// ompt_advance_buffer_cursor handle a null cursor. +static void on_ompt_callback_buffer_complete( + int device_num, ompt_buffer_t *buffer, + size_t bytes, /* bytes returned in this callback */ + ompt_buffer_cursor_t begin, int buffer_owned) { + OmptCallbackHandler::get().handleBufferComplete(device_num, buffer, bytes, + begin, buffer_owned); + + int Status = 1; + ompt_buffer_cursor_t CurrentPos = begin; + while (Status) { + ompt_record_ompt_t *Record = ompt_get_record_ompt(buffer, CurrentPos); + if (ompt_get_record_type_fn(buffer, CurrentPos) != ompt_record_ompt) { + printf("WARNING: received non-ompt type buffer object\n"); + } + // TODO: Sometimes it may happen that the retrieved record may be null?! + // Only handle non-null records + if (Record != nullptr) + OmptCallbackHandler::get().handleBufferRecord(Record); + Status = ompt_advance_buffer_cursor(/*device=*/NULL, buffer, bytes, + CurrentPos, &CurrentPos); + } + if (buffer_owned) { + OmptCallbackHandler::get().handleBufferRecordDeallocation(buffer); + free(buffer); + } +} + +static ompt_set_result_t set_trace_ompt(ompt_device_t *Device) { + if (!ompt_set_trace_ompt) + return ompt_set_error; + + if (UseEMICallbacks) { + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_emi); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_data_op_emi); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_submit_emi); + } else { + ompt_set_trace_ompt(Device, /*enable=*/1, /*etype=*/ompt_callback_target); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_data_op); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_submit); + } + + return ompt_set_always; +} + +/////// HOST-RELATED ////// + +static void on_ompt_callback_thread_begin(ompt_thread_t thread_type, + ompt_data_t *thread_data) { + OmptCallbackHandler::get().handleThreadBegin(thread_type, thread_data); +} + +static void on_ompt_callback_thread_end(ompt_data_t *thread_data) { + OmptCallbackHandler::get().handleThreadEnd(thread_data); +} + +static void on_ompt_callback_parallel_begin( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, + unsigned int requested_parallelism, int flags, const void *codeptr_ra) { + OmptCallbackHandler::get().handleParallelBegin( + encountering_task_data, encountering_task_frame, parallel_data, + requested_parallelism, flags, codeptr_ra); +} + +static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + int flags, const void *codeptr_ra) { + OmptCallbackHandler::get().handleParallelEnd( + parallel_data, encountering_task_data, flags, codeptr_ra); +} + +static void +on_ompt_callback_task_create(ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *new_task_data, int flags, + int has_dependences, const void *codeptr_ra) { + OmptCallbackHandler::get().handleTaskCreate( + encountering_task_data, encountering_task_frame, new_task_data, flags, + has_dependences, codeptr_ra); +} + +static void on_ompt_callback_task_schedule(ompt_data_t *prior_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *next_task_data) { + OmptCallbackHandler::get().handleTaskSchedule( + prior_task_data, prior_task_status, next_task_data); +} + +static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int actual_parallelism, + unsigned int index, int flags) { + OmptCallbackHandler::get().handleImplicitTask( + endpoint, parallel_data, task_data, actual_parallelism, index, flags); +} + +// Callbacks as of Table 19.4, which are not considered required for a minimal +// conforming OMPT implementation. +static void on_ompt_callback_work(ompt_work_t work_type, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, uint64_t count, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleWork(work_type, endpoint, parallel_data, + task_data, count, codeptr_ra); +} + +static void on_ompt_callback_dispatch(ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance) { + OmptCallbackHandler::get().handleDispatch(parallel_data, task_data, kind, + instance); +} + +static void on_ompt_callback_sync_region(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleSyncRegion(kind, endpoint, parallel_data, + task_data, codeptr_ra); +} + +/////// DEVICE-RELATED ////// + +// Synchronous callbacks +static void on_ompt_callback_device_initialize(int device_num, const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation) { + OmptCallbackHandler::get().handleDeviceInitialize(device_num, type, device, + lookup, documentation); + if (!UseTracing) + return; + + if (!lookup) { + printf("Trace collection disabled on device %d\n", device_num); + return; + } + + ompt_set_trace_ompt = (ompt_set_trace_ompt_t)lookup("ompt_set_trace_ompt"); + ompt_start_trace = (ompt_start_trace_t)lookup("ompt_start_trace"); + ompt_flush_trace = (ompt_flush_trace_t)lookup("ompt_flush_trace"); + ompt_stop_trace = (ompt_stop_trace_t)lookup("ompt_stop_trace"); + ompt_get_record_ompt = (ompt_get_record_ompt_t)lookup("ompt_get_record_ompt"); + ompt_advance_buffer_cursor = + (ompt_advance_buffer_cursor_t)lookup("ompt_advance_buffer_cursor"); + + ompt_get_record_type_fn = + (ompt_get_record_type_t)lookup("ompt_get_record_type"); + if (!ompt_get_record_type_fn) { + printf("WARNING: No function ompt_get_record_type found in device " + "callbacks\n"); + } + + static bool IsDeviceMapInitialized = false; + if (!IsDeviceMapInitialized) { + TracedDevices = std::make_unique(); + IsDeviceMapInitialized = true; + } + + set_trace_ompt(device); + + // In many scenarios, this will be a good place to start the + // trace. If start_trace is called from the main program before this + // callback is dispatched, the start_trace handle will be null. This + // is because this device_init callback is invoked during the first + // target construct implementation. + + start_trace(device); +} + +static void on_ompt_callback_device_finalize(int device_num) { + OmptCallbackHandler::get().handleDeviceFinalize(device_num); +} + +static void on_ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, + void *vma_in_file, size_t bytes, + void *host_addr, void *device_addr, + uint64_t module_id) { + OmptCallbackHandler::get().handleDeviceLoad( + device_num, filename, offset_in_file, vma_in_file, bytes, host_addr, + device_addr, module_id); +} + +static void on_ompt_callback_device_unload(int device_num, uint64_t module_id) { + OmptCallbackHandler::get().handleDeviceUnload(device_num, module_id); +} + +static void on_ompt_callback_target_data_op( + ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra) { + OmptCallbackHandler::get().handleTargetDataOp( + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleTarget(kind, endpoint, device_num, task_data, + target_id, codeptr_ra); +} + +static void on_ompt_callback_target_submit(ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams) { + OmptCallbackHandler::get().handleTargetSubmit(target_id, host_op_id, + requested_num_teams); +} + +static void on_ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_id_t *host_op_id, + ompt_target_data_op_t optype, void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + // Both src and dest must not be null + // However, for omp_target_alloc only the END call holds a value for one of + // the two entries + if (optype != ompt_target_data_alloc) + assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); + if (endpoint == ompt_scope_begin) + *host_op_id = NextOpId.fetch_add(1, std::memory_order_relaxed); + OmptCallbackHandler::get().handleTargetDataOpEmi( + endpoint, target_task_data, target_data, host_op_id, optype, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + if (endpoint == ompt_scope_begin) + target_data->value = NextOpId.fetch_add(1, std::memory_order_relaxed); + OmptCallbackHandler::get().handleTargetEmi(kind, endpoint, device_num, + task_data, target_task_data, + target_data, codeptr_ra); +} + +static void on_ompt_callback_target_submit_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, + ompt_id_t *host_op_id, unsigned int requested_num_teams) { + OmptCallbackHandler::get().handleTargetSubmitEmi( + endpoint, target_data, host_op_id, requested_num_teams); +} + +static void on_ompt_callback_target_map(ompt_id_t target_id, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map callback is unimplemented"); +} + +static void on_ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, + void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map emi callback is unimplemented"); +} + +/// Load the value of a given boolean environmental variable. +bool getBoolEnvironmentVariable(const char *VariableName) { + if (VariableName == nullptr) + return false; + if (const char *EnvValue = std::getenv(VariableName)) { + std::string S{EnvValue}; + for (auto &C : S) + C = (char)std::tolower(C); + if (S == "1" || S == "on" || S == "true" || S == "yes") + return true; + } + return false; +} + +/// Called by the OMP runtime to initialize the OMPT +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback_t ompt_set_callback = nullptr; + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + if (!ompt_set_callback) + return 0; // failure + + UseEMICallbacks = getBoolEnvironmentVariable("OMPTEST_USE_OMPT_EMI"); + UseTracing = getBoolEnvironmentVariable("OMPTEST_USE_OMPT_TRACING"); + RunAsTestSuite = getBoolEnvironmentVariable("OMPTEST_RUN_AS_TESTSUITE"); + ColoredLog = getBoolEnvironmentVariable("OMPTEST_LOG_COLORED"); + + register_ompt_callback(ompt_callback_thread_begin); + register_ompt_callback(ompt_callback_thread_end); + register_ompt_callback(ompt_callback_parallel_begin); + register_ompt_callback(ompt_callback_parallel_end); + register_ompt_callback(ompt_callback_work); + // register_ompt_callback(ompt_callback_dispatch); + register_ompt_callback(ompt_callback_task_create); + // register_ompt_callback(ompt_callback_dependences); + // register_ompt_callback(ompt_callback_task_dependence); + register_ompt_callback(ompt_callback_task_schedule); + register_ompt_callback(ompt_callback_implicit_task); + // register_ompt_callback(ompt_callback_masked); + register_ompt_callback(ompt_callback_sync_region); + // register_ompt_callback(ompt_callback_mutex_acquire); + // register_ompt_callback(ompt_callback_mutex); + // register_ompt_callback(ompt_callback_nestLock); + // register_ompt_callback(ompt_callback_flush); + // register_ompt_callback(ompt_callback_cancel); + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_device_unload); + + if (UseEMICallbacks) { + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_map_emi); + } else { + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target_map); + } + + // Construct & subscribe the reporter, so it will be notified of events + EventReporter = new OmptEventReporter(); + OmptCallbackHandler::get().subscribe(EventReporter); + + if (RunAsTestSuite) + EventReporter->setActive(false); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) { + assert(Handler && "Callback handler should be present at this point"); + assert(EventReporter && "EventReporter should be present at this point"); + delete Handler; + delete EventReporter; +} + +#ifdef __cplusplus +extern "C" { +#endif +/// Called from the OMP Runtime to start / initialize the tool +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = { + &ompt_initialize, &ompt_finalize, {0}}; + return &ompt_start_tool_result; +} + +int start_trace(ompt_device_t *Device) { + if (!ompt_start_trace) + return 0; + + // This device will be traced + assert(TracedDevices->find(Device) == TracedDevices->end() && + "Device already present in the map"); + TracedDevices->insert(Device); + + return ompt_start_trace(Device, &on_ompt_callback_buffer_request, + &on_ompt_callback_buffer_complete); +} + +int flush_trace(ompt_device_t *Device) { + if (!ompt_flush_trace) + return 0; + return ompt_flush_trace(Device); +} + +int flush_traced_devices() { + if (!ompt_flush_trace || TracedDevices == nullptr) + return 0; + + size_t NumFlushedDevices = 0; + for (auto Device : *TracedDevices) + if (ompt_flush_trace(Device) == 1) + ++NumFlushedDevices; + + // Provide time to process triggered assert events + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + return (NumFlushedDevices == TracedDevices->size()); +} + +int stop_trace(ompt_device_t *Device) { + if (!ompt_stop_trace) + return 0; + + // This device will not be traced anymore + assert(TracedDevices->find(Device) != TracedDevices->end() && + "Device not present in the map"); + TracedDevices->erase(Device); + + return ompt_stop_trace(Device); +} + +// This is primarily used to stop unwanted prints from happening. +void libomptest_global_eventreporter_set_active(bool State) { + assert(EventReporter && "EventReporter should be present at this point"); + EventReporter->setActive(State); +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/tools/omptest/src/OmptTesterStandalone.cpp b/openmp/tools/omptest/src/OmptTesterStandalone.cpp new file mode 100644 index 0000000000000..d4f68b4576536 --- /dev/null +++ b/openmp/tools/omptest/src/OmptTesterStandalone.cpp @@ -0,0 +1,147 @@ +//===- OmptTesterStandalone.cpp - Standalone unit testing impl. -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the 'standalone' ompTest unit testing core +/// implementation, defining the general test suite and test case execution. +/// +//===----------------------------------------------------------------------===// + +#include "OmptTesterStandalone.h" +#include "OmptCallbackHandler.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace omptest; + +Error TestCase::exec() { + Error E; + E.Fail = false; + + if (IsDisabled) + return E; + + OmptCallbackHandler::get().subscribe(SequenceAsserter.get()); + OmptCallbackHandler::get().subscribe(SetAsserter.get()); + OmptCallbackHandler::get().subscribe(EventReporter.get()); + + execImpl(); + + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // We remove subscribers to not be notified of events after our test case + // finished. + OmptCallbackHandler::get().clearSubscribers(); + omptest::AssertState SequenceResultState = SequenceAsserter->checkState(); + omptest::AssertState SetResultState = SetAsserter->checkState(); + bool AnyFail = SequenceResultState == omptest::AssertState::fail || + SetResultState == omptest::AssertState::fail; + bool AllPass = SequenceResultState == omptest::AssertState::pass && + SetResultState == omptest::AssertState::pass; + if (ExpectedState == omptest::AssertState::pass && AnyFail) + E.Fail = true; + else if (ExpectedState == omptest::AssertState::fail && AllPass) + E.Fail = true; + if (AnyFail) + ResultState = omptest::AssertState::fail; + return E; +} + +TestSuite::TestSuite(TestSuite &&O) { + Name = O.Name; + TestCases.swap(O.TestCases); +} + +void TestSuite::setup() {} + +void TestSuite::teardown() {} + +TestSuite::TestCaseVec::iterator TestSuite::begin() { + return TestCases.begin(); +} + +TestSuite::TestCaseVec::iterator TestSuite::end() { return TestCases.end(); } + +TestRegistrar &TestRegistrar::get() { + static TestRegistrar TR; + return TR; +} + +std::vector TestRegistrar::getTestSuites() { + std::vector TSs; + for (auto &[k, v] : Tests) + TSs.emplace_back(std::move(v)); + return TSs; +} + +void TestRegistrar::addCaseToSuite(TestCase *TC, std::string TSName) { + auto &TS = Tests[TSName]; + if (TS.Name.empty()) + TS.Name = TSName; + TS.TestCases.emplace_back(TC); +} + +Registerer::Registerer(TestCase *TC, const std::string SuiteName) { + std::cout << "Adding " << TC->Name << " to " << SuiteName << std::endl; + TestRegistrar::get().addCaseToSuite(TC, SuiteName); +} + +int Runner::run() { + int ErrorCount = 0; + for (auto &TS : TestSuites) { + std::cout << "\n======\nExecuting for " << TS.Name << std::endl; + TS.setup(); + for (auto &TC : TS) { + std::cout << "\nExecuting " << TC->Name << std::endl; + if (Error Err = TC->exec()) { + reportError(Err); + abortOrKeepGoing(); + ++ErrorCount; + } + } + TS.teardown(); + } + printSummary(); + return ErrorCount; +} + +void Runner::reportError(const Error &Err) {} + +void Runner::abortOrKeepGoing() {} + +void Runner::printSummary() { + std::cout << "\n====== SUMMARY\n"; + for (auto &TS : TestSuites) { + std::cout << " - " << TS.Name; + for (auto &TC : TS) { + std::string Result; + if (TC->IsDisabled) { + Result = "-#-#-"; + } else if (TC->ResultState == TC->ExpectedState) { + if (TC->ResultState == omptest::AssertState::pass) + Result = "PASS"; + else if (TC->ResultState == omptest::AssertState::fail) + Result = "XFAIL"; + } else { + if (TC->ResultState == omptest::AssertState::fail) + Result = "FAIL"; + else if (TC->ResultState == omptest::AssertState::pass) + Result = "UPASS"; + } + std::cout << "\n " << std::setw(5) << Result << " : " << TC->Name; + } + std::cout << std::endl; + } +} diff --git a/openmp/tools/omptest/test/CMakeLists.txt b/openmp/tools/omptest/test/CMakeLists.txt new file mode 100644 index 0000000000000..427893313cc67 --- /dev/null +++ b/openmp/tools/omptest/test/CMakeLists.txt @@ -0,0 +1,28 @@ +##===----------------------------------------------------------------------===## +# +# Add ompTest unit tests to check-openmp. +# +##===----------------------------------------------------------------------===## + +# Target: ompTest library unit tests +file(GLOB UNITTEST_SOURCES "unittests/*.cpp") +add_executable(omptest-unittests ${UNITTEST_SOURCES}) + +# Add local and LLVM-provided GoogleTest include directories. +target_include_directories(omptest-unittests PRIVATE + ../include + ${LLVM_THIRD_PARTY_DIR}/unittest/googletest/include) + +target_link_libraries(omptest-unittests PRIVATE omptest) + +set_target_properties(omptest-unittests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +# Add ompTest unit tests to check-openmp +add_openmp_testsuite(check-ompt-omptest "Running OMPT ompTest unit tests" + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omptest-unittests) + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by OPENMP_TOOLS_OMPTEST_TEST " + "configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) diff --git a/openmp/tools/omptest/test/lit.cfg b/openmp/tools/omptest/test/lit.cfg new file mode 100644 index 0000000000000..69c401aed83b8 --- /dev/null +++ b/openmp/tools/omptest/test/lit.cfg @@ -0,0 +1,26 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +# name: The name of this test suite. +config.name = 'OMPT ompTest' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = [''] + +# test_source_root: The root path where tests are located. +config.test_source_root = config.test_obj_root + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.test_obj_root + +# test format, match (omptest-)unittests +# Matched binaries (GoogleTests) are executed +config.test_format = lit.formats.GoogleTest(".", "unittests") diff --git a/openmp/tools/omptest/test/lit.site.cfg.in b/openmp/tools/omptest/test/lit.site.cfg.in new file mode 100644 index 0000000000000..4fa8c7e349681 --- /dev/null +++ b/openmp/tools/omptest/test/lit.site.cfg.in @@ -0,0 +1,9 @@ + at AUTO_GEN_COMMENT@ + +config.test_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp b/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp new file mode 100644 index 0000000000000..34ceb7597b791 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp @@ -0,0 +1,358 @@ +#include "OmptAliases.h" +#include "OmptAsserter.h" +#include +#include + +#include "gtest/gtest.h" + +using namespace omptest; +using OAE = omptest::OmptAssertEvent; +using OS = omptest::ObserveState; + +/// SequencedAsserter test-fixture class to avoid code duplication among tests. +class OmptSequencedAsserterTest : public testing::Test { +protected: + OmptSequencedAsserterTest() { + // Construct default sequenced asserter + SeqAsserter = std::make_unique(); + + // Silence all potential log prints + SeqAsserter->getLog()->setLoggingLevel(logging::Level::SILENT); + } + + std::unique_ptr SeqAsserter; +}; + +TEST_F(OmptSequencedAsserterTest, DefaultState) { + // Assertion should neither start as 'deactivated' nor 'suspended' + ASSERT_EQ(SeqAsserter->isActive(), true); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + + // Assertion should begin with event ID zero + ASSERT_EQ(SeqAsserter->NextEvent, 0); + + // Assertion should begin without previous notifications or assertions + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + + // There should be no expected events + ASSERT_EQ(SeqAsserter->Events.empty(), true); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + + // Default mode should be strict + ASSERT_NE(SeqAsserter->getOperationMode(), AssertMode::relaxed); + ASSERT_EQ(SeqAsserter->getOperationMode(), AssertMode::strict); + + // Default state should be passing + ASSERT_NE(SeqAsserter->getState(), AssertState::fail); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); + ASSERT_NE(SeqAsserter->checkState(), AssertState::fail); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, IgnoreNotificationsWhenEmpty) { + // ParallelBegin events are suppressed by default + auto SuppressedEvent = OAE::ParallelBegin( + /*Name=*/"ParBegin", /*Group=*/"", /*Expected=*/OS::always, + /*NumThreads=*/3); + + // DeviceFinalize events are not ignored by default + auto IgnoredEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + + // Situation: There is nothing to assert. + // Result: All notifications are ignored. + // Hence, check that the perceived count of notifications remains unchanged + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + SeqAsserter->notify(std::move(SuppressedEvent)); + + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + SeqAsserter->notify(std::move(IgnoredEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, IgnoreNotificationsWhileDeactivated) { + auto ExpectedEvent = OAE::DeviceUnload( + /*Name=*/"DevUnload", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Deactivate asserter, effectively ignoring notifications + SeqAsserter->setActive(false); + ASSERT_EQ(SeqAsserter->isActive(), false); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // DeviceFinalize events are not ignored by default + auto IgnoredEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->notify(std::move(IgnoredEvent)); + + // Assertion was deactivated: No change + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + + SeqAsserter->setActive(true); + ASSERT_EQ(SeqAsserter->isActive(), true); + + auto ObservedEvent = OAE::DeviceUnload( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->notify(std::move(ObservedEvent)); + + // Assertion was activated, one notification expected + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEvent) { + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // Sanity check: Notifications should not be triggered + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + // Adding an expected event must change the event count but not the state + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventIgnoreSuppressed) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // ParallelBegin events are suppressed by default + auto SuppressedEvent = OAE::ParallelBegin( + /*Name=*/"ParBegin", /*Group=*/"", /*Expected=*/OS::always, + /*NumThreads=*/3); + // Situation: There is one expected event and ParallelBegins are suppressed. + // Notification count remains unchanged for suppressed events + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + SeqAsserter->notify(std::move(SuppressedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObservePass) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObserveFail) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + // Provide wrong DeviceNum + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/23); + + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + // Observed and expected event do not match: Fail + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObserveDifferentType) { + auto ExpectedEvent = OAE::DeviceUnload( + /*Name=*/"DevUnload", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + // Observed and expected event do not match: Fail + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckTargetGroupNoEffect) { + // Situation: Groups are designed to be used as an indicator -WITHIN- target + // regions. Hence, comparing two target regions w.r.t. their groups has no + // effect on pass or fail. + + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Deactivate asserter, effectively ignoring notifications + SeqAsserter->setActive(false); + ASSERT_EQ(SeqAsserter->isActive(), false); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + + // Assertion was deactivated: No change + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Re-activate asserter + SeqAsserter->setActive(true); + ASSERT_EQ(SeqAsserter->isActive(), true); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // Actually observe a target event from "AnotherGroup" + auto AnotherObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"AnotherGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(AnotherObservedEvent)); + + // Observed all expected events; groups of target regions do not affect pass + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, CheckSyncPoint) { + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + SeqAsserter->notify(OAE::AssertionSyncPoint( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::always, + /*SyncPointName=*/"SyncPoint 1")); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + + // All events processed: SyncPoint "passes" + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + auto AnotherExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + SeqAsserter->insert(std::move(AnotherExpectedEvent)); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Remaining events present: SyncPoint "fails" + SeqAsserter->notify(OAE::AssertionSyncPoint( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::always, + /*SyncPointName=*/"SyncPoint 2")); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckExcessNotify) { + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + // All events processed: pass + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + // Target events are not ignored by default + auto AnotherObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + + // No more events expected: notify "fails" + SeqAsserter->notify(std::move(AnotherObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckSuspend) { + SeqAsserter->insert(OAE::AssertionSuspend( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::never)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Being notified while the next expected event is a "suspend" should change + // the asserter's state + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + SeqAsserter->notify(OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7)); + ASSERT_EQ(SeqAsserter->AssertionSuspended, true); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + + // Being notified with an observed event, which matches the next expected + // event, resumes assertion (suspended = false) + ASSERT_EQ(SeqAsserter->AssertionSuspended, true); + SeqAsserter->notify(OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr)); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} diff --git a/openmp/tools/omptest/test/unittests/internal-event-test.cpp b/openmp/tools/omptest/test/unittests/internal-event-test.cpp new file mode 100644 index 0000000000000..a53025460c7e0 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/internal-event-test.cpp @@ -0,0 +1,530 @@ +#include "InternalEvent.h" +#include +#include + +#include "gtest/gtest.h" + +using namespace omptest; + +TEST(InternalEvent_toString, AssertionSyncPoint) { + internal::AssertionSyncPoint SP{/*Name=*/"Test Sync Point"}; + + EXPECT_EQ(SP.toString(), "Assertion SyncPoint: 'Test Sync Point'"); +} + +TEST(InternalEvent_toString, ThreadBegin) { + internal::ThreadBegin TB{/*ThreadType=*/ompt_thread_t::ompt_thread_initial}; + + EXPECT_EQ(TB.toString(), "OMPT Callback ThreadBegin: ThreadType=1"); +} + +TEST(InternalEvent_toString, ThreadEnd) { + internal::ThreadEnd TE{}; + + EXPECT_EQ(TE.toString(), "OMPT Callback ThreadEnd"); +} + +TEST(InternalEvent_toString, ParallelBegin) { + internal::ParallelBegin PB{/*NumThreads=*/31}; + + EXPECT_EQ(PB.toString(), "OMPT Callback ParallelBegin: NumThreads=31"); +} + +TEST(InternalEvent_toString, ParallelEnd) { + internal::ParallelEnd PE{/*ParallelData=*/(ompt_data_t *)0x11, + /*EncounteringTaskData=*/(ompt_data_t *)0x22, + /*Flags=*/31, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(PE.toString(), "OMPT Callback ParallelEnd"); +} + +TEST(InternalEvent_toString, Work) { + internal::Work WK{/*WorkType=*/ompt_work_t::ompt_work_loop_dynamic, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_beginend, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Count=*/31, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(WK.toString(), + "OMPT Callback Work: work_type=11 endpoint=3 parallel_data=0x11 " + "task_data=0x22 count=31 codeptr=0x33"); +} + +TEST(InternalEvent_toString, Dispatch_iteration) { + ompt_data_t DI{.value = 31}; + internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_iteration, + /*Instance=*/DI}; + + EXPECT_EQ(D.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=1 instance=[it=31]"); +} + +TEST(InternalEvent_toString, Dispatch_section) { + ompt_data_t DI{.ptr = (void *)0x33}; + internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_section, + /*Instance=*/DI}; + + EXPECT_EQ(D.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=2 instance=[ptr=0x33]"); +} + +TEST(InternalEvent_toString, Dispatch_chunks) { + ompt_dispatch_chunk_t DC{.start = 7, .iterations = 31}; + ompt_data_t DI{.ptr = (void *)&DC}; + + internal::Dispatch DLoop{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_ws_loop_chunk, + /*Instance=*/DI}; + + internal::Dispatch DTask{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_taskloop_chunk, + /*Instance=*/DI}; + + internal::Dispatch DDist{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_distribute_chunk, + /*Instance=*/DI}; + + ompt_data_t DINull{.ptr = nullptr}; + internal::Dispatch DDistNull{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_distribute_chunk, + /*Instance=*/DINull}; + + EXPECT_EQ(DLoop.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=3 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DTask.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=4 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DDist.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=5 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DDistNull.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=5"); +} + +TEST(InternalEvent_toString, TaskCreate) { + internal::TaskCreate TC{/*EncounteringTaskData=*/(ompt_data_t *)0x11, + /*EncounteringTaskFrame=*/(const ompt_frame_t *)0x22, + /*NewTaskData=*/(ompt_data_t *)0x33, + /*Flags=*/7, + /*HasDependences=*/31, + /*CodeptrRA=*/(const void *)0x44}; + + EXPECT_EQ(TC.toString(), + "OMPT Callback TaskCreate: encountering_task_data=0x11 " + "encountering_task_frame=0x22 new_task_data=0x33 flags=7 " + "has_dependences=31 codeptr=0x44"); +} + +TEST(InternalEvent_toString, ImplicitTask) { + internal::ImplicitTask IT{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*ActualParallelism=*/7, + /*Index=*/31, + /*Flags=*/127}; + + EXPECT_EQ(IT.toString(), + "OMPT Callback ImplicitTask: endpoint=1 parallel_data=0x11 " + "task_data=0x22 actual_parallelism=7 index=31 flags=127"); +} + +TEST(InternalEvent_toString, SyncRegion) { + internal::SyncRegion SR{ + /*Kind=*/ompt_sync_region_t::ompt_sync_region_taskwait, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_end, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(SR.toString(), "OMPT Callback SyncRegion: kind=5 endpoint=2 " + "parallel_data=0x11 task_data=0x22 codeptr=0x33"); +} + +TEST(InternalEvent_toString, Target) { + internal::Target T{/*Kind=*/ompt_target_t::ompt_target_enter_data_nowait, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_end, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)0x11, + /*TargetId=*/(ompt_id_t)31, + /*CodeptrRA=*/(const void *)0x22}; + + EXPECT_EQ(T.toString(), "Callback Target: target_id=31 kind=10 " + "endpoint=2 device_num=7 code=0x22"); +} + +TEST(InternalEvent_toString, TargetEmi) { + ompt_data_t TaskData{.value = 31}; + ompt_data_t TargetTaskData{.value = 127}; + ompt_data_t TargetData{.value = 8191}; + + internal::TargetEmi T{/*Kind=*/ompt_target_t::ompt_target_update, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)&TaskData, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*CodeptrRA=*/(const void *)0x11}; + + internal::TargetEmi TDataNull{ + /*Kind=*/ompt_target_t::ompt_target_update, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)&TaskData, + /*TargetTaskData=*/(ompt_data_t *)nullptr, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*CodeptrRA=*/(const void *)0x11}; + + std::ostringstream StreamT1; + std::ostringstream StreamT2; + std::string CallBackPrefix{ + "Callback Target EMI: kind=4 endpoint=1 device_num=7"}; + StreamT1 << CallBackPrefix << std::showbase << std::hex; + StreamT1 << " task_data=" << &TaskData << " (0x1f)"; + StreamT1 << " target_task_data=" << &TargetTaskData << " (0x7f)"; + StreamT1 << " target_data=" << &TargetData << " (0x1fff)"; + StreamT1 << " code=0x11"; + + StreamT2 << CallBackPrefix << std::showbase << std::hex; + StreamT2 << " task_data=" << &TaskData << " (0x1f)"; + StreamT2 << " target_task_data=(nil) (0x0)"; + StreamT2 << " target_data=" << &TargetData << " (0x1fff)"; + StreamT2 << " code=0x11"; + + EXPECT_EQ(T.toString(), StreamT1.str()); + EXPECT_EQ(TDataNull.toString(), StreamT2.str()); +} + +TEST(InternalEvent_toString, TargetDataOp) { + internal::TargetDataOp TDO{ + /*TargetId=*/7, + /*HostOpId=*/31, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_associate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/127, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/8191, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ( + TDO.toString(), + " Callback DataOp: target_id=7 host_op_id=31 optype=5 src=0x11 " + "src_device_num=127 dest=0x22 dest_device_num=8191 bytes=4096 code=0x33"); +} + +TEST(InternalEvent_toString, TargetDataOpEmi) { + ompt_data_t TargetTaskData{.value = 31}; + ompt_data_t TargetData{.value = 127}; + ompt_id_t HostOpId = 8191; + + internal::TargetDataOpEmi TDO{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)&HostOpId, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_disassociate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/1, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/2, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + // Set HostOpId=nullptr + internal::TargetDataOpEmi TDO_HostOpIdNull{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)nullptr, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_disassociate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/1, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/2, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + std::ostringstream StreamTDO1; + std::ostringstream StreamTDO2; + std::string CallBackPrefix{" Callback DataOp EMI: endpoint=1 optype=6"}; + std::string CallBackSuffix{ + " src=0x11 src_device_num=1 dest=0x22 dest_device_num=2 " + "bytes=4096 code=0x33"}; + StreamTDO1 << CallBackPrefix << std::showbase << std::hex; + StreamTDO1 << " target_task_data=" << &TargetTaskData << " (0x1f)"; + StreamTDO1 << " target_data=" << &TargetData << " (0x7f)"; + StreamTDO1 << " host_op_id=" << &HostOpId << " (0x1fff)"; + StreamTDO1 << CallBackSuffix; + + StreamTDO2 << CallBackPrefix << std::showbase << std::hex; + StreamTDO2 << " target_task_data=" << &TargetTaskData << " (0x1f)"; + StreamTDO2 << " target_data=" << &TargetData << " (0x7f)"; + StreamTDO2 << " host_op_id=(nil) (0x0)"; + StreamTDO2 << CallBackSuffix; + + EXPECT_EQ(TDO.toString(), StreamTDO1.str()); + EXPECT_EQ(TDO_HostOpIdNull.toString(), StreamTDO2.str()); +} + +TEST(InternalEvent_toString, TargetSubmit) { + internal::TargetSubmit TS{/*TargetId=*/7, + /*HostOpId=*/31, + /*RequestedNumTeams=*/127}; + + EXPECT_EQ(TS.toString(), + " Callback Submit: target_id=7 host_op_id=31 req_num_teams=127"); +} + +TEST(InternalEvent_toString, TargetSubmitEmi) { + ompt_data_t TargetData{.value = 127}; + ompt_id_t HostOpId = 8191; + internal::TargetSubmitEmi TS{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)&HostOpId, + /*RequestedNumTeams=*/7}; + + std::ostringstream StreamTS; + std::string CallBackPrefix{ + " Callback Submit EMI: endpoint=1 req_num_teams=7"}; + StreamTS << CallBackPrefix << std::showbase << std::hex; + StreamTS << " target_data=" << &TargetData << " (0x7f)"; + StreamTS << " host_op_id=" << &HostOpId << " (0x1fff)"; + + EXPECT_EQ(TS.toString(), StreamTS.str()); +} + +TEST(InternalEvent_toString, DeviceInitialize) { + const char *Type = "DeviceType"; + const char *DocStr = "DocumentationString"; + + internal::DeviceInitialize DI{/*DeviceNum=*/7, + /*Type=*/Type, + /*Device=*/(ompt_device_t *)0x11, + /*LookupFn=*/(ompt_function_lookup_t)0x22, + /*DocStr=*/DocStr}; + + internal::DeviceInitialize DINull{/*DeviceNum=*/0, + /*Type=*/nullptr, + /*Device=*/nullptr, + /*LookupFn=*/(ompt_function_lookup_t)0x0, + /*DocStr=*/nullptr}; + + std::ostringstream StreamDI; + std::string CallBackPrefix{"Callback Init: device_num=7 type=DeviceType " + "device=0x11 lookup=0x22 doc="}; + StreamDI << CallBackPrefix << std::showbase << std::hex; + StreamDI << (uint64_t)DocStr; + EXPECT_EQ(DI.toString(), StreamDI.str()); + + // TODO This looks inconsistent: (null) vs. (nil) + EXPECT_EQ(DINull.toString(), "Callback Init: device_num=0 type=(null) " + "device=(nil) lookup=(nil) doc=(nil)"); +} + +TEST(InternalEvent_toString, DeviceFinalize) { + internal::DeviceFinalize DF{/*DeviceNum=*/7}; + + EXPECT_EQ(DF.toString(), "Callback Fini: device_num=7"); +} + +TEST(InternalEvent_toString, DeviceLoad) { + const char *Filename = "FilenameToLoad"; + + internal::DeviceLoad DL{/*DeviceNum=*/7, + /*Filename=*/Filename, + /*OffsetInFile=*/31, + /*VmaInFile=*/(void *)0x11, + /*Bytes=*/127, + /*HostAddr=*/(void *)0x22, + /*DeviceAddr=*/(void *)0x33, + /*ModuleId=*/8191}; + + internal::DeviceLoad DLNull{/*DeviceNum=*/0, + /*Filename=*/nullptr, + /*OffsetInFile=*/0, + /*VmaInFile=*/nullptr, + /*Bytes=*/0, + /*HostAddr=*/nullptr, + /*DeviceAddr=*/nullptr, + /*ModuleId=*/0}; + + EXPECT_EQ( + DL.toString(), + "Callback Load: device_num:7 module_id:8191 " + "filename:FilenameToLoad host_adddr:0x22 device_addr:0x33 bytes:127"); + + // TODO This looks inconsistent: (null) vs. (nil) and ':' instead of '=' + EXPECT_EQ(DLNull.toString(), + "Callback Load: device_num:0 module_id:0 filename:(null) " + "host_adddr:(nil) device_addr:(nil) bytes:0"); +} + +TEST(InternalEvent_toString, BufferRequest) { + size_t Bytes = 7; + ompt_buffer_t *Buffer = (void *)0x11; + + internal::BufferRequest BR{/*DeviceNum=*/31, + /*Buffer=*/&Buffer, + /*Bytes=*/&Bytes}; + + internal::BufferRequest BRNull{/*DeviceNum=*/127, + /*Buffer=*/nullptr, + /*Bytes=*/nullptr}; + + EXPECT_EQ(BR.toString(), + "Allocated 7 bytes at 0x11 in buffer request callback"); + EXPECT_EQ(BRNull.toString(), + "Allocated 0 bytes at (nil) in buffer request callback"); +} + +TEST(InternalEvent_toString, BufferComplete) { + ompt_buffer_t *Buffer = (void *)0x11; + + internal::BufferComplete BC{/*DeviceNum=*/7, + /*Buffer=*/Buffer, + /*Bytes=*/127, + /*Begin=*/8191, + /*BufferOwned=*/1}; + + internal::BufferComplete BCNull{/*DeviceNum=*/0, + /*Buffer=*/nullptr, + /*Bytes=*/0, + /*Begin=*/0, + /*BufferOwned=*/0}; + + EXPECT_EQ(BC.toString(), + "Executing buffer complete callback: 7 0x11 127 0x1fff 1"); + EXPECT_EQ(BCNull.toString(), + "Executing buffer complete callback: 0 (nil) 0 (nil) 0"); +} + +TEST(InternalEvent_toString, BufferRecordInvalid) { + ompt_record_ompt_t InvalidRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_parallel_begin, + /*time=*/7, + /*thread_id=*/31, + /*target_id=*/127, + /*record=*/{.parallel_begin = {}}}; + + internal::BufferRecord BRNull{/*RecordPtr=*/nullptr}; + internal::BufferRecord BRInvalid{/*RecordPtr=*/&InvalidRecord}; + + std::ostringstream StreamBRInvalid; + StreamBRInvalid << "rec=" << std::showbase << std::hex << &InvalidRecord; + StreamBRInvalid << " type=3 (unsupported record type)"; + + EXPECT_EQ(BRNull.toString(), "rec=(nil) type=0 (unsupported record type)"); + EXPECT_EQ(BRInvalid.toString(), StreamBRInvalid.str()); +} + +TEST(InternalEvent_toString, BufferRecordTarget) { + ompt_record_target_t SubRecordTarget{ + /*kind=*/ompt_target_t::ompt_target_update, + /*endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*device_num=*/2, + /*task_id=*/127, + /*target_id=*/31, + /*codeptr_ra=*/(const void *)0x11}; + + ompt_record_ompt_t TargetRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target, + /*time=*/7, + /*thread_id=*/29, + /*target_id=*/31, + /*record*/ {.target = SubRecordTarget}}; + + internal::BufferRecord BR{/*RecordPtr=*/&TargetRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &TargetRecord; + StreamBR << " type=8 (Target task) time=7 thread_id=29 target_id=31 kind=4"; + StreamBR << " endpoint=1 device=2 task_id=127 codeptr=0x11"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordDataOp) { + ompt_record_target_data_op_t SubRecordTargetDataOp{ + /*host_op_id=*/7, + /*optype=*/ompt_target_data_op_t::ompt_target_data_alloc_async, + /*src_addr=*/(void *)0x11, + /*src_device_num=*/1, + /*dest_addr=*/(void *)0x22, + /*dest_device_num=*/2, + /*bytes=*/127, + /*end_time=*/128, + /*codeptr_ra=*/(const void *)0x33, + }; + + ompt_record_ompt_t DataOpRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target_data_op_emi, + /*time=*/8, + /*thread_id=*/3, + /*target_id=*/5, + /*record=*/{.target_data_op = SubRecordTargetDataOp}}; + + internal::BufferRecord BR{/*RecordPtr=*/&DataOpRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &DataOpRecord; + StreamBR << " type=34 (Target data op) time=8 thread_id=3 target_id=5"; + StreamBR << " host_op_id=7 optype=17 src_addr=0x11 src_device=1"; + StreamBR << " dest_addr=0x22 dest_device=2 bytes=127 end_time=128"; + StreamBR << " duration=120 ns codeptr=0x33"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordKernel) { + ompt_record_target_kernel_t SubRecordTargetKernel{ + /*host_op_id=*/11, + /*requested_num_teams=*/127, + /*granted_num_teams=*/63, + /*end_time=*/8191, + }; + + ompt_record_ompt_t KernelRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target_submit_emi, + /*time=*/9, + /*thread_id=*/19, + /*target_id=*/33, + /*record=*/{.target_kernel = SubRecordTargetKernel}}; + + internal::BufferRecord BR{/*RecordPtr=*/&KernelRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &KernelRecord; + StreamBR << " type=35 (Target kernel) time=9 thread_id=19 target_id=33"; + StreamBR << " host_op_id=11 requested_num_teams=127 granted_num_teams=63"; + StreamBR << " end_time=8191 duration=8182 ns"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordDeallocation) { + internal::BufferRecordDeallocation BRD{/*Buffer=*/(ompt_record_ompt_t *)0x11}; + internal::BufferRecordDeallocation BRDNull{/*Buffer=*/nullptr}; + + EXPECT_EQ(BRD.toString(), "Deallocated 0x11"); + EXPECT_EQ(BRDNull.toString(), "Deallocated (nil)"); +} diff --git a/openmp/tools/omptest/test/unittests/internal-util-test.cpp b/openmp/tools/omptest/test/unittests/internal-util-test.cpp new file mode 100644 index 0000000000000..6a9868b85c3a3 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/internal-util-test.cpp @@ -0,0 +1,95 @@ +#include "InternalEvent.h" +#include + +#include "gtest/gtest.h" + +using namespace omptest; + +TEST(InternalUtility, ExpectedDefault_Integer) { + // int: -2147483648 (decimal) = 0x80000000 (hexadecimal) + EXPECT_EQ(expectedDefault(int), 0x80000000); + EXPECT_EQ(expectedDefault(int), (0x1 << 31)); + // int64_t: -9223372036854775808 (decimal) = 0x8000000000000000 (hexadecimal) + EXPECT_EQ(expectedDefault(int64_t), 0x8000000000000000); + EXPECT_EQ(expectedDefault(int64_t), (0x1L << 63)); +} + +TEST(InternalUtility, ExpectedDefault_Zero) { + // Expectedly zero + EXPECT_EQ(expectedDefault(size_t), 0); + EXPECT_EQ(expectedDefault(unsigned int), 0); + EXPECT_EQ(expectedDefault(ompt_id_t), 0); + EXPECT_EQ(expectedDefault(ompt_dispatch_t), 0); + EXPECT_EQ(expectedDefault(ompt_device_time_t), 0); +} + +TEST(InternalUtility, ExpectedDefault_Nullpointer) { + // Expectedly nullptr + EXPECT_EQ(expectedDefault(const char *), nullptr); + EXPECT_EQ(expectedDefault(const void *), nullptr); + EXPECT_EQ(expectedDefault(int *), nullptr); + EXPECT_EQ(expectedDefault(void *), nullptr); + EXPECT_EQ(expectedDefault(ompt_data_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_device_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_frame_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_function_lookup_t), nullptr); + EXPECT_EQ(expectedDefault(ompt_id_t *), nullptr); +} + +TEST(InternalUtility, MakeHexString_PointerValues) { + // IsPointer should only affect zero value + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/true), "(nil)"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false), "0x0"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true), "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/false), "0xff"); +} + +TEST(InternalUtility, MakeHexString_MinimumBytes) { + // Return a minimum length, based on the (minimum) requested bytes + EXPECT_EQ(util::makeHexString(15, /*IsPointer=*/true, /*MinBytes=*/0), "0xf"); + EXPECT_EQ(util::makeHexString(15, /*IsPointer=*/true, /*MinBytes=*/1), + "0x0f"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/0), + "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/1), + "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/2), + "0x00ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/3), + "0x0000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/4), + "0x000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/5), + "0x00000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/6), + "0x0000000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/7), + "0x000000000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/8), + "0x00000000000000ff"); + + // Default to four bytes, if request exceeds eight byte range + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/9), + "0x000000ff"); + + // Disregard requested minimum byte width, if actual value exceeds it + EXPECT_EQ(util::makeHexString(1024, /*IsPointer=*/true, /*MinBytes=*/1), + "0x400"); +} + +TEST(InternalUtility, MakeHexString_HexBase) { + // Cut off "0x" when requested + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/true, /*MinBytes=*/0, + /*ShowHexBase=*/false), + "(nil)"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false, /*MinBytes=*/0, + /*ShowHexBase=*/false), + "0"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false, /*MinBytes=*/1, + /*ShowHexBase=*/false), + "00"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, + /*MinBytes=*/2, + /*ShowHexBase=*/false), + "00ff"); +} diff --git a/openmp/tools/omptest/test/unittests/main-test.cpp b/openmp/tools/omptest/test/unittests/main-test.cpp new file mode 100644 index 0000000000000..2eba663e49c8e --- /dev/null +++ b/openmp/tools/omptest/test/unittests/main-test.cpp @@ -0,0 +1,141 @@ +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptTester.h" +#include + +#include "gtest/gtest.h" + +using OS = omptest::ObserveState; +using OAE = omptest::OmptAssertEvent; + +TEST(CompareOperatorTests, ThreadBeginIdentity) { + auto TBInitial = + OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_initial); + auto TBWorker = OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_worker); + auto TBOther = OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_other); + auto TBUnknown = + OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_unknown); + + ASSERT_EQ(TBInitial, TBInitial); + ASSERT_EQ(TBWorker, TBWorker); + ASSERT_EQ(TBOther, TBOther); + ASSERT_EQ(TBUnknown, TBUnknown); +} + +TEST(CompareOperatorTests, ThreadEndIdentity) { + auto TE = OAE::ThreadEnd("dflt", "", OS::always); + + ASSERT_EQ(TE, TE); +} + +TEST(CompareOperatorTests, ParallelBeginIdentity) { + auto PBNumT = OAE::ParallelBegin("thrdenable", "", OS::always, 3); + + ASSERT_EQ(PBNumT, PBNumT); +} + +TEST(CompareOperatorTests, ParallelEndIdentity) { + auto PEDflt = OAE::ParallelEnd("dflt", "", OS::always); + // TODO: Add cases with parallel data set, task data set, flags + + ASSERT_EQ(PEDflt, PEDflt); +} + +TEST(CompareOperatorTests, WorkIdentity) { + auto WDLoopBgn = + OAE::Work("loopbgn", "", OS::always, ompt_work_loop, ompt_scope_begin); + auto WDLoopEnd = + OAE::Work("loobend", "", OS::always, ompt_work_loop, ompt_scope_end); + + ASSERT_EQ(WDLoopBgn, WDLoopBgn); + ASSERT_EQ(WDLoopEnd, WDLoopEnd); + + auto WDSectionsBgn = OAE::Work("sectionsbgn", "", OS::always, + ompt_work_sections, ompt_scope_begin); + auto WDSectionsEnd = OAE::Work("sectionsend", "", OS::always, + ompt_work_sections, ompt_scope_end); + + // TODO: singleexecutor, single_other, workshare, distribute, taskloop, scope, + // loop_static, loop_dynamic, loop_guided, loop_other + + ASSERT_EQ(WDSectionsBgn, WDSectionsBgn); + ASSERT_EQ(WDSectionsEnd, WDSectionsEnd); +} + +TEST(CompareOperatorTests, DispatchIdentity) { + auto DIDflt = OAE::Dispatch("dflt", "", OS::always); + + ASSERT_EQ(DIDflt, DIDflt); +} + +TEST(CompareOperatorTests, TaskCreateIdentity) { + auto TCDflt = OAE::TaskCreate("dflt", "", OS::always); + + ASSERT_EQ(TCDflt, TCDflt); +} + +TEST(CompareOperatorTests, TaskScheduleIdentity) { + auto TS = OAE::TaskSchedule("dflt", "", OS::always); + + ASSERT_EQ(TS, TS); +} + +TEST(CompareOperatorTests, ImplicitTaskIdentity) { + auto ITDfltBgn = + OAE::ImplicitTask("dfltbgn", "", OS::always, ompt_scope_begin); + auto ITDfltEnd = OAE::ImplicitTask("dfltend", "", OS::always, ompt_scope_end); + + ASSERT_EQ(ITDfltBgn, ITDfltBgn); + ASSERT_EQ(ITDfltEnd, ITDfltEnd); +} + +TEST(CompareOperatorTests, SyncRegionIdentity) { + auto SRDfltBgn = + OAE::SyncRegion("srdfltbgn", "", OS::always, + ompt_sync_region_barrier_explicit, ompt_scope_begin); + auto SRDfltEnd = + OAE::SyncRegion("srdfltend", "", OS::always, + ompt_sync_region_barrier_explicit, ompt_scope_end); + + ASSERT_EQ(SRDfltBgn, SRDfltBgn); + ASSERT_EQ(SRDfltEnd, SRDfltEnd); +} + +TEST(CompareOperatorTests, TargetIdentity) { + auto TargetDfltBgn = + OAE::Target("dfltbgn", "", OS::always, ompt_target, ompt_scope_begin); + auto TargetDfltEnd = + OAE::Target("dfltend", "", OS::always, ompt_target, ompt_scope_end); + + ASSERT_EQ(TargetDfltBgn, TargetDfltBgn); + ASSERT_EQ(TargetDfltEnd, TargetDfltEnd); + + auto TargetDevBgn = OAE::Target("tgtdevbgn", "", OS::always, ompt_target, + ompt_scope_begin, 1); + auto TargetDevEnd = + OAE::Target("tgtdevend", "", OS::always, ompt_target, ompt_scope_end, 1); + + ASSERT_EQ(TargetDevBgn, TargetDevBgn); + ASSERT_EQ(TargetDevEnd, TargetDevEnd); +} + +TEST(CompareOperatorTests, BufferRecordIdentity) { + // Default, no time limit or anything + auto BRDflt = + OAE::BufferRecord("dflt", "", OS::always, ompt_callback_target_submit); + + // Minimum time set, no max time + auto BRMinSet = OAE::BufferRecord("minset", "", OS::always, + ompt_callback_target_submit, 10); + + // Minimum time and maximum time set + auto BRMinMaxSet = OAE::BufferRecord("minmaxset", "", OS::always, + ompt_callback_target_submit, {10, 100}); + + ASSERT_EQ(BRDflt, BRDflt); + ASSERT_EQ(BRMinSet, BRMinSet); + ASSERT_EQ(BRMinMaxSet, BRMinMaxSet); +} + +// Add main definition +OMPTEST_TESTSUITE_MAIN() From openmp-commits at lists.llvm.org Tue Jul 8 05:47:32 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 08 Jul 2025 05:47:32 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686d1364.170a0220.330c4d.cce2@mx.google.com> mhalk wrote: > It seems we forgot to add the appropriate license statement in the source files. Good catch, thanks for pointing that out. It should be corrected now :) https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 8 06:30:00 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Tue, 08 Jul 2025 06:30:00 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686d1d58.050a0220.1d1e70.f02d@mx.google.com> jplehr wrote: Maybe as an additional comment, given that it may not be obvious from the initial message: This is the first part of our efforts to upstream our OMPT device-tracing support from OpenMP offloading. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 8 07:07:38 2025 From: openmp-commits at lists.llvm.org (Robert Imschweiler via Openmp-commits) Date: Tue, 08 Jul 2025 07:07:38 -0700 (PDT) Subject: [Openmp-commits] [openmp] Reland: [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #147532) Message-ID: https://github.com/ro-i created https://github.com/llvm/llvm-project/pull/147532 OpenMP 6.0 12.1.2 specifies the behavior of the strict modifier for the num_threads clause on parallel directives, along with the message and severity clauses. This commit implements necessary host runtime changes. Reland https://github.com/llvm/llvm-project/pull/146403. After manual testing on a gfx90a machine, I could not reproduce the failing test, which makes it even more likely that the test has just been flaky. (Or at least that it's not an issue related to this patch.) >From b3e7d2eb642b67ab8fbfeeddd8e9178170b1a3dc Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Fri, 27 Jun 2025 07:54:07 -0500 Subject: [PATCH] [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) OpenMP 6.0 12.1.2 specifies the behavior of the strict modifier for the num_threads clause on parallel directives, along with the message and severity clauses. This commit implements necessary host runtime changes. --- openmp/runtime/src/kmp_runtime.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..6afea9b994de4 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1214,6 +1214,12 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { // Reset for next parallel region this_thr->th.th_set_proc_bind = proc_bind_default; + // OpenMP 6.0 12.1.2 requires the num_threads 'strict' modifier to also have + // effect when parallel execution is disabled by a corresponding if clause + // attached to the parallel directive. + if (this_thr->th.th_nt_strict && this_thr->th.th_set_nproc > 1) + __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev, + this_thr->th.th_nt_msg); // Reset num_threads for next parallel region this_thr->th.th_set_nproc = 0; From openmp-commits at lists.llvm.org Tue Jul 8 07:10:52 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 07:10:52 -0700 (PDT) Subject: [Openmp-commits] [openmp] Reland: [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #147532) In-Reply-To: Message-ID: <686d26ec.630a0220.9fc0e.2032@mx.google.com> https://github.com/jprotze approved this pull request. Lgtm https://github.com/llvm/llvm-project/pull/147532 From openmp-commits at lists.llvm.org Tue Jul 8 07:15:29 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 07:15:29 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686d2801.170a0220.35778e.0e1f@mx.google.com> jprotze wrote: Am I right, that the unittests in this PR are to test omptest itself, and not some specific event sequences from the OpenMP runtime library? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 8 07:29:43 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Tue, 08 Jul 2025 07:29:43 -0700 (PDT) Subject: [Openmp-commits] [openmp] Reland: [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #147532) In-Reply-To: Message-ID: <686d2b57.050a0220.23eff.d5c2@mx.google.com> https://github.com/jplehr approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/147532 From openmp-commits at lists.llvm.org Tue Jul 8 07:39:35 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 08 Jul 2025 07:39:35 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <686d2da7.050a0220.28c530.4426@mx.google.com> mhalk wrote: > Am I right, that the unittests in this PR are to test omptest itself, and not some specific event sequences from the OpenMP runtime library? Yes, correct. Currently, these unit tests, while not exhaustive, provide some basic coverage and a starting point for further testing. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 8 08:03:39 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 08:03:39 -0700 (PDT) Subject: [Openmp-commits] [clang] [llvm] [openmp] [clang][OpenMP] New OpenMP 6.0 threadset clause (PR #135807) In-Reply-To: Message-ID: <686d334b.170a0220.e138a.477c@mx.google.com> https://github.com/Ritanya-B-Bharadwaj updated https://github.com/llvm/llvm-project/pull/135807 >From 9c56e59ba9984c14c15a8d5a95a02e7192a64e8f Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Sun, 6 Apr 2025 09:33:06 -0500 Subject: [PATCH 1/7] [OpenMP] Parsing Support of ThreadSets in Task --- clang/include/clang/AST/OpenMPClause.h | 80 +++++++++++++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 6 ++ clang/include/clang/Basic/OpenMPKinds.def | 8 +- clang/include/clang/Basic/OpenMPKinds.h | 7 ++ clang/include/clang/Sema/SemaOpenMP.h | 6 ++ clang/lib/AST/OpenMPClause.cpp | 7 ++ clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 9 +++ clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 21 +++++ clang/lib/Sema/TreeTransform.h | 7 ++ clang/lib/Serialization/ASTReader.cpp | 11 +++ clang/lib/Serialization/ASTWriter.cpp | 6 ++ clang/tools/libclang/CIndex.cpp | 2 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 + 15 files changed, 176 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 572e62249b46f..81420384f885c 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1332,6 +1332,86 @@ class OMPDefaultClause : public OMPClause { } }; +/// This represents 'threadset' clause in the '#pragma omp ...' directive. +/// +/// \code +/// #pragma omp parallel threadset(shared) +/// \endcode +/// In this example directive '#pragma omp parallel' has simple 'threadset' +/// clause with kind 'shared'. +class OMPThreadsetClause : public OMPClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// A kind of the 'threadset' clause. + OpenMPThreadsetKind Kind = OMPC_THREADSET_unknown; + + /// Start location of the kind in source code. + SourceLocation KindLoc; + + /// Set kind of the clauses. + /// + /// \param K Argument of clause. + void setThreadsetKind(OpenMPThreadsetKind K) { Kind = K; } + + /// Set argument location. + /// + /// \param KLoc Argument location. + void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } + +public: + /// Build 'threadset' clause with argument \a A ('none' or 'shared'). + /// + /// \param A Argument of the clause ('none' or 'shared'). + /// \param ALoc Starting location of the argument. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPThreadsetClause(OpenMPThreadsetKind A, SourceLocation ALoc, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_threadset, StartLoc, EndLoc), + LParenLoc(LParenLoc), Kind(A), KindLoc(ALoc) {} + + /// Build an empty clause. + OMPThreadsetClause() + : OMPClause(llvm::omp::OMPC_threadset, SourceLocation(), + SourceLocation()) {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns kind of the clause. + OpenMPThreadsetKind getThreadsetKind() const { return Kind; } + + /// Returns location of clause kind. + SourceLocation getThreadsetKindLoc() const { return KindLoc; } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_threadset; + } +}; + /// This represents 'proc_bind' clause in the '#pragma omp ...' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 0530996ed20d3..d86c7d4577ac6 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3410,6 +3410,12 @@ bool RecursiveASTVisitor::VisitOMPDefaultClause(OMPDefaultClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPThreadsetClause( + OMPThreadsetClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPProcBindClause(OMPProcBindClause *) { return true; diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index b0de65df7e397..5b8889b8f7a34 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -92,6 +92,9 @@ #ifndef OPENMP_ALLOCATE_MODIFIER #define OPENMP_ALLOCATE_MODIFIER(Name) #endif +#ifndef OPENMP_THREADSET_KIND +#define OPENMP_THREADSET_KIND(Name) +#endif // Static attributes for 'schedule' clause. OPENMP_SCHEDULE_KIND(static) @@ -236,6 +239,9 @@ OPENMP_DOACROSS_MODIFIER(sink) OPENMP_DOACROSS_MODIFIER(sink_omp_cur_iteration) OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) +OPENMP_THREADSET_KIND(omp_pool) +OPENMP_THREADSET_KIND(omp_team) + #undef OPENMP_NUMTASKS_MODIFIER #undef OPENMP_GRAINSIZE_MODIFIER #undef OPENMP_BIND_KIND @@ -263,4 +269,4 @@ OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) #undef OPENMP_DEFAULTMAP_MODIFIER #undef OPENMP_DOACROSS_MODIFIER #undef OPENMP_ALLOCATE_MODIFIER - +#undef OPENMP_THREADSET_KIND diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6ca9f9c550285..e93e4bdbfb7d7 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -237,6 +237,13 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; +/// OpenMP modifiers for 'allocate' clause. +enum OpenMPThreadsetKind { +#define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_THREADSET_unknown +}; + /// Number of allowed allocate-modifiers. static constexpr unsigned NumberOfOMPAllocateClauseModifiers = OMPC_ALLOCATE_unknown; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 6498390fe96f7..d6a0167177f12 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -955,6 +955,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'threadset' clause. + OMPClause *ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'proc_bind' clause. OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, SourceLocation KindLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 2226791a70b6e..85f9c1ab47ae8 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1913,6 +1913,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) { << ")"; } +void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) { + OS << "threadset(" + << getOpenMPSimpleClauseTypeName(OMPC_threadset, + unsigned(Node->getThreadsetKind())) + << ")"; +} + void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) { OS << "proc_bind(" << getOpenMPSimpleClauseTypeName(OMPC_proc_bind, diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 83d54da9be7e5..5b18d1bf4019d 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -540,6 +540,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) { void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { } +void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {} + void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { } void OMPClauseProfiler::VisitOMPUnifiedAddressClause( diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 09921e3b1edfc..b17a3b14a5ab2 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -185,6 +185,15 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, #define OPENMP_ALLOCATE_MODIFIER(Name) .Case(#Name, OMPC_ALLOCATE_##Name) #include "clang/Basic/OpenMPKinds.def" .Default(OMPC_ALLOCATE_unknown); + case OMPC_threadset: { + unsigned Type = llvm::StringSwitch(Str) +#define OPENMP_THREADSET_KIND(Name) .Case(#Name, OMPC_THREADSET_##Name) +#include "clang/Basic/OpenMPKinds.def" + .Default(OMPC_THREADSET_unknown); + if (LangOpts.OpenMP < 60) + return OMPC_THREADSET_unknown; + return Type; + } case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index b0e6c2f07a1e7..610089affde47 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3266,6 +3266,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, else Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); break; + case OMPC_threadset: case OMPC_fail: case OMPC_default: case OMPC_proc_bind: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a382947455aef..2d57a9b54c02f 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16129,6 +16129,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( static_cast(Argument), ArgumentLoc, StartLoc, LParenLoc, EndLoc); break; + case OMPC_threadset: + Res = ActOnOpenMPThreadsetClause(static_cast(Argument), + ArgumentLoc, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: @@ -16266,6 +16270,23 @@ OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(DefaultKind Kind, OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } +OMPClause *SemaOpenMP::ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + if (Kind == OMPC_THREADSET_unknown) { + Diag(KindLoc, diag::err_omp_unexpected_clause_value) + << getListOfPossibleValues(OMPC_threadset, /*First=*/0, + /*Last=*/unsigned(OMPC_THREADSET_unknown)) + << getOpenMPClauseName(OMPC_threadset); + return nullptr; + } + + return new (getASTContext()) + OMPThreadsetClause(Kind, KindLoc, StartLoc, LParenLoc, EndLoc); +} + OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind, SourceLocation KindKwLoc, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3689d323cf25b..5aca6c40308bc 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10539,6 +10539,13 @@ TreeTransform::TransformOMPDefaultClause(OMPDefaultClause *C) { C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPThreadsetClause(OMPThreadsetClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPProcBindClause(OMPProcBindClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 8e573a11efd35..957cc12aa773a 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11440,6 +11440,17 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKindKwLoc(Record.readSourceLocation()); } +// Read the parameter of fail clause. This will have been saved when +// OMPClauseWriter is called. +void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + C->setLParenLoc(Record.readSourceLocation()); + SourceLocation ThreadsetKindLoc = Record.readSourceLocation(); + C->setThreadsetKindLoc(ThreadsetKindLoc); + OpenMPThreadsetKind TKind = + static_cast(Record.readInt()); + C->setThreadsetKind(TKind); +} + void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) { C->setProcBindKind(static_cast(Record.readInt())); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 84f7f2bc5fce4..2818748e38183 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7785,6 +7785,12 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) { Record.AddSourceLocation(C->getDefaultKindKwLoc()); } +void OMPClauseWriter::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getThreadsetKindLoc()); + Record.writeEnum(C->getThreadsetKind()); +} + void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) { Record.push_back(unsigned(C->getProcBindKind())); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 6ea6447d1d590..fc96f86df8108 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2454,6 +2454,8 @@ void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} void OMPClauseEnqueue::VisitOMPFailClause(const OMPFailClause *) {} +void OMPClauseEnqueue::VisitOMPThreadsetClause(const OMPThreadsetClause *) {} + void OMPClauseEnqueue::VisitOMPAbsentClause(const OMPAbsentClause *) {} void OMPClauseEnqueue::VisitOMPHoldsClause(const OMPHoldsClause *) {} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index e2a1449d8cc76..8c73ddc780c76 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -497,6 +497,9 @@ def OMPC_ThreadPrivate : Clause<"threadprivate"> { def OMPC_Threads : Clause<"threads"> { let clangClass = "OMPThreadsClause"; } +def OMPC_Threadset : Clause<"threadset"> { + let clangClass = "OMPThreadsetClause"; +} def OMPC_To : Clause<"to"> { let clangClass = "OMPToClause"; let flangClass = "OmpToClause"; @@ -1152,6 +1155,7 @@ def OMP_Task : Directive<"task"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; >From bcc7c388a929e49ba6805f0038a3b7cdaa475fee Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Tue, 15 Apr 2025 11:13:40 -0500 Subject: [PATCH 2/7] [clang][OpenMP] New OpenMP 6.0 threadset clause --- clang/include/clang/AST/OpenMPClause.h | 12 +-- clang/include/clang/Basic/OpenMPKinds.h | 2 +- clang/lib/AST/OpenMPClause.cpp | 1 + clang/lib/Basic/OpenMPKinds.cpp | 10 ++ clang/lib/Serialization/ASTReader.cpp | 2 +- clang/test/OpenMP/task_ast_print.cpp | 12 +++ clang/test/OpenMP/task_threadset_messages.cpp | 99 +++++++++++++++++++ clang/test/OpenMP/taskloop_ast_print.cpp | 16 +++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 9 files changed, 147 insertions(+), 8 deletions(-) create mode 100755 clang/test/OpenMP/task_threadset_messages.cpp diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 81420384f885c..aeaf5c292b1be 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1332,13 +1332,13 @@ class OMPDefaultClause : public OMPClause { } }; -/// This represents 'threadset' clause in the '#pragma omp ...' directive. +/// This represents 'threadset' clause in the '#pragma omp task ...' directive. /// /// \code -/// #pragma omp parallel threadset(shared) +/// #pragma omp task threadset(omp_pool) /// \endcode -/// In this example directive '#pragma omp parallel' has simple 'threadset' -/// clause with kind 'shared'. +/// In this example directive '#pragma omp task' has simple 'threadset' +/// clause with kind 'omp_pool'. class OMPThreadsetClause : public OMPClause { friend class OMPClauseReader; @@ -1362,9 +1362,9 @@ class OMPThreadsetClause : public OMPClause { void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } public: - /// Build 'threadset' clause with argument \a A ('none' or 'shared'). + /// Build 'threadset' clause with argument \a A ('omp_team' or 'omp_pool'). /// - /// \param A Argument of the clause ('none' or 'shared'). + /// \param A Argument of the clause ('omp_team' or 'omp_pool'). /// \param ALoc Starting location of the argument. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index e93e4bdbfb7d7..d3611f2d65989 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -237,7 +237,7 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; -/// OpenMP modifiers for 'allocate' clause. +/// OpenMP modifiers for 'threadset' clause. enum OpenMPThreadsetKind { #define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, #include "clang/Basic/OpenMPKinds.def" diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 85f9c1ab47ae8..24ab245758d93 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -121,6 +121,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_nowait: case OMPC_untied: case OMPC_mergeable: + case OMPC_threadset: case OMPC_threadprivate: case OMPC_flush: case OMPC_depobj: diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index b17a3b14a5ab2..1586a4e1f24c9 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -529,6 +529,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'allocate' clause modifier"); + case OMPC_threadset: + switch (Type) { + case OMPC_THREADSET_unknown: + return "unknown"; +#define OPENMP_THREADSET_KIND(Name) \ + case OMPC_THREADSET_##Name: \ + return #Name; +#include "clang/Basic/OpenMPKinds.def" + } + llvm_unreachable("Invalid OpenMP 'threadset' clause modifier"); case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 957cc12aa773a..b9b464bc1dae2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11440,7 +11440,7 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKindKwLoc(Record.readSourceLocation()); } -// Read the parameter of fail clause. This will have been saved when +// Read the parameter of threadset clause. This will have been saved when // OMPClauseWriter is called. void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 30fb7ab75cc87..5cfb32b8c1302 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -ast-dump %s | FileCheck %s --check-prefix=DUMP @@ -101,9 +103,11 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); +#ifndef OMP60 #pragma omp taskgroup task_reduction(-: argc) #pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) foo(); +#endif return 0; } @@ -199,6 +203,14 @@ int main(int argc, char **argv) { #pragma omp task depend(inout: omp_all_memory) foo(); // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp task threadset(omp_pool) +#pragma omp task threadset(omp_team) + foo(); +#endif + // CHECK60: #pragma omp task threadset(omp_pool) + // CHECK60: #pragma omp task threadset(omp_team) + // CHECK60-NEXT: foo(); return tmain(b, &b) + tmain(x, &x); } diff --git a/clang/test/OpenMP/task_threadset_messages.cpp b/clang/test/OpenMP/task_threadset_messages.cpp new file mode 100755 index 0000000000000..f553a2da17ab9 --- /dev/null +++ b/clang/test/OpenMP/task_threadset_messages.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +#ifdef OMP60 +struct ComplexStruct { + int data[10]; + struct InnerStruct { + float value; + } inner; +}; + +// Template class with member functions using 'threadset'. +template +class TemplateClass { +public: + void foo() { + #pragma omp task threadset(omp_pool) + { + T temp; + } + } + void bar() { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + } +}; + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in task directive. +void test_task_threadset_valid() { + int a; + #pragma omp task threadset(omp_pool) + #pragma omp task threadset(omp_team) + #pragma omp task threadset(omp_pool) if(1) + #pragma omp task threadset(omp_team) priority(5) + #pragma omp task threadset(omp_pool) depend(out: a) + #pragma omp parallel + { + #pragma omp task threadset(omp_pool) + { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 5; ++i) {} + } + } + + TemplateClass obj; + obj.foo(); + obj.bar(); +} + +// Invalid uses of 'threadset' with incorrect arguments in task directive. +void test_task_threadset_invalid_args() { + #pragma omp task threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + #pragma omp task threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + {} +} + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in taskloop directive. +void test_taskloop_threadset_valid() { + #pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) grainsize(5) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) num_tasks(2) + for (int i = 0; i < 10; ++i) {} +} + +// Invalid uses of 'threadset' with incorrect arguments in taskloop directive. +void test_taskloop_threadset_invalid_args() { + #pragma omp taskloop threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} +} + +#else +void test_threadset_not_supported() { + #pragma omp task threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp task threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp taskloop threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} +} +#endif diff --git a/clang/test/OpenMP/taskloop_ast_print.cpp b/clang/test/OpenMP/taskloop_ast_print.cpp index 1b6d7240fa66c..e4bf20af5d78e 100644 --- a/clang/test/OpenMP/taskloop_ast_print.cpp +++ b/clang/test/OpenMP/taskloop_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // expected-no-diagnostics @@ -87,6 +89,20 @@ int main(int argc, char **argv) { // CHECK-NEXT: #pragma omp cancel taskgroup // CHECK-NEXT: #pragma omp cancellation point taskgroup // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { +#pragma omp taskloop threadset(omp_pool) + for (int j = 0; j < 10; ++j) { + foo(); + } +} +#endif + // CHECK60: #pragma omp taskloop threadset(omp_team) + // CHECK60-NEXT: for (int i = 0; i < 10; ++i) { + // CHECK60: #pragma omp taskloop threadset(omp_pool) + // CHECK60-NEXT: for (int j = 0; j < 10; ++j) { + // CHECK60-NEXT: foo(); return (tmain(argc) + tmain(argv[0][0])); } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 8c73ddc780c76..14b086d5504e8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1187,6 +1187,7 @@ def OMP_TaskLoop : Directive<"taskloop"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedExclusiveClauses = [ VersionedClause, >From d700caad78a86e8acbbf92c0e01fe8378cc1d0b3 Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Sun, 20 Apr 2025 12:46:26 -0500 Subject: [PATCH 3/7] [clang] [OpenMP] Codegen support for threadset --- clang/docs/OpenMPSupport.rst | 2 +- clang/docs/ReleaseNotes.rst | 1 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++ clang/lib/Serialization/ASTReader.cpp | 3 ++ clang/test/OpenMP/task_ast_print.cpp | 18 ++++----- clang/test/OpenMP/task_codegen.cpp | 33 ++++++++++++++++ clang/test/OpenMP/taskloop_codegen.cpp | 53 ++++++++++++++++++++++++++ 7 files changed, 105 insertions(+), 11 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 83d90ffef6bc7..ee05a65c2aa12 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :`worked on` | :none:`unclaimed` | | +| threadset clause | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5e8df45e71d54..f17bb3bf44a7e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -591,6 +591,7 @@ OpenMP Support - Added support 'no_openmp_constructs' assumption clause. - Added support for 'self_maps' in map and requirement clause. - Added support for 'omp stripe' directive. +- Added support for threadset clause in task and taskloop directives. Improvements ^^^^^^^^^^^^ diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5736864d4cc6b..3d51ed0088014 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3691,6 +3691,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, + PoolFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3700,6 +3701,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (NeedsCleanup) Flags = Flags | DestructorsFlag; } + if (const auto *Clause = D.getSingleClause()) { + OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); + if (Kind == OMPC_THREADSET_omp_pool) + Flags = Flags | PoolFlag; + } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; if (D.hasClausesOfKind()) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b9b464bc1dae2..62b86b0929133 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11050,6 +11050,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_mergeable: C = new (Context) OMPMergeableClause(); break; + case llvm::omp::OMPC_threadset: + C = new (Context) OMPThreadsetClause(); + break; case llvm::omp::OMPC_read: C = new (Context) OMPReadClause(); break; diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 5cfb32b8c1302..b059f187156ee 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -103,11 +103,9 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); -#ifndef OMP60 -#pragma omp taskgroup task_reduction(-: argc) -#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) +#pragma omp taskgroup task_reduction(+: argc) +#pragma omp task if (C) mergeable priority(C) in_reduction(+: argc) foo(); -#endif return 0; } @@ -123,8 +121,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(T)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -138,8 +136,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(int)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -153,8 +151,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(long)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(+: argc) // CHECK-NEXT: foo() enum Enum {}; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index c3e6d9e6b1cf7..ba8e6945de9d0 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -41,6 +41,9 @@ // RUN: -emit-llvm -o - -DOMP51 | FileCheck %s \ // RUN: --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -Wno-vla -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER @@ -65,6 +68,7 @@ struct S { S(const S &s) : a(s.a) {} ~S() {} }; + int a; int main() { char b; @@ -147,6 +151,7 @@ int main() { + // s1 = S(); @@ -215,6 +220,19 @@ void test_omp_all_memory() } } #endif // OMP51 + +#ifdef OMP60 +void test_threadset() +{ +#pragma omp task threadset(omp_team) + { + } +#pragma omp task threadset(omp_pool) + { + } +} +#endif // OMP60 + #endif // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { @@ -10243,3 +10261,18 @@ void test_omp_all_memory() // CHECK4-51-NEXT: call void @__cxx_global_var_init() // CHECK4-51-NEXT: ret void // +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 +// CHECK6-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num, i32 1, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %0, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) +// CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index 69f8d3b160bfd..d1197607a2684 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -5,7 +5,12 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -241,4 +246,52 @@ void taskloop_with_class() { } } +#ifdef OMP60 +void test_threadset() +{ +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { + } +#pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) { + } +} +#endif // OMP60 +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK6-NEXT: %[[TMP:.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK6-NEXT: %[[TMP2:.*]] = alloca i32, align 4 +// CHECK6-NEXT: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID2:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID1:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID3:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID3:.*]], align 8 +// CHECK6-NEXT: %[[TID4:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID4:.*]], align 8 +// CHECK6-NEXT: %[[TID5:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: %[[TID6:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %[[TID6:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID7:.*]] = load i64, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 +// CHECK6-NEXT: %[[TID11:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID11:.*]], align 8 +// CHECK6-NEXT: %[[TID12:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID12:.*]], align 8 +// CHECK6-NEXT: %[[TID13:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TID13:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID14:.*]] = load i64, ptr [[TID12:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID8:.*]], i32 1, ptr %[[TID10:.*]], ptr %[[TID11:.*]], i64 %[[TID14:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: ret void + #endif >From 11deb35b539cb2a01f271d91b616252a35951dda Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Thu, 22 May 2025 09:29:08 -0500 Subject: [PATCH 4/7] Adding basic runtime support --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 ++-- clang/test/OpenMP/task_codegen.cpp | 2 +- clang/test/OpenMP/taskloop_codegen.cpp | 2 +- openmp/runtime/src/kmp.h | 6 ++++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 3d51ed0088014..99603f781a19f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3691,7 +3691,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, - PoolFlag = 0x80, + FreeAgentFlag = 0x100, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3704,7 +3704,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (const auto *Clause = D.getSingleClause()) { OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); if (Kind == OMPC_THREADSET_omp_pool) - Flags = Flags | PoolFlag; + Flags = Flags | FreeAgentFlag; } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index ba8e6945de9d0..0edf1fcac5b4c 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -10271,7 +10271,7 @@ void test_threadset() // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) // CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) -// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 257, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) // CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) // CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index d1197607a2684..b06e4bc9d79f6 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -279,7 +279,7 @@ void test_threadset() // CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) // CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) // CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) -// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 257, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) // CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 // CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 // CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index d5d667c32c643..1d93b5d169fd6 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2740,7 +2740,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned tasking_ser : 1; unsigned task_serial : 1; unsigned tasktype : 1; - unsigned reserved : 8; + unsigned reserved : 7; + unsigned free_agent_eligible : 1; unsigned hidden_helper : 1; unsigned detachable : 1; unsigned priority_specified : 1; @@ -2763,7 +2764,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ setting for the task */ unsigned detachable : 1; /* 1 == can detach */ unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned reserved : 8; /* reserved for compiler use */ + unsigned free_agent_eligible : 1; /* set if task can be executed by a free-agent thread */ + unsigned reserved : 7; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ >From 57fd6ad403f70132df013f1b0ffc711205a7f8ef Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Mon, 16 Jun 2025 23:49:54 +0530 Subject: [PATCH 5/7] Removing runtime changes --- openmp/runtime/src/kmp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 1d93b5d169fd6..d5d667c32c643 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2740,8 +2740,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned tasking_ser : 1; unsigned task_serial : 1; unsigned tasktype : 1; - unsigned reserved : 7; - unsigned free_agent_eligible : 1; + unsigned reserved : 8; unsigned hidden_helper : 1; unsigned detachable : 1; unsigned priority_specified : 1; @@ -2764,8 +2763,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ setting for the task */ unsigned detachable : 1; /* 1 == can detach */ unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned free_agent_eligible : 1; /* set if task can be executed by a free-agent thread */ - unsigned reserved : 7; /* reserved for compiler use */ + unsigned reserved : 8; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ >From bd92f54ca143b4f75a0011fdca0930a7c2919371 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 17 Jun 2025 17:36:29 +0530 Subject: [PATCH 6/7] Update OpenMPSupport.rst --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index c748fa2dcf851..f98e59e15ce85 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/135807 | +| threadset clause | :good:`mostly` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ >From de0c3874384b971ead2933accfdd776e2e63ffd8 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 17 Jun 2025 20:12:39 +0530 Subject: [PATCH 7/7] Update OpenMPSupport.rst --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index f98e59e15ce85..79cf44ed435ef 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :good:`mostly` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +| threadset clause | :part:`partial` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ From openmp-commits at lists.llvm.org Tue Jul 8 08:08:06 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 08:08:06 -0700 (PDT) Subject: [Openmp-commits] [clang] [llvm] [openmp] [clang][OpenMP] New OpenMP 6.0 threadset clause (PR #135807) In-Reply-To: Message-ID: <686d3456.170a0220.1ea7c9.4990@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- clang/test/OpenMP/task_threadset_messages.cpp clang/include/clang/AST/OpenMPClause.h clang/include/clang/AST/RecursiveASTVisitor.h clang/include/clang/Basic/OpenMPKinds.h clang/include/clang/Sema/SemaOpenMP.h clang/lib/AST/OpenMPClause.cpp clang/lib/AST/StmtProfile.cpp clang/lib/Basic/OpenMPKinds.cpp clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/SemaOpenMP.cpp clang/lib/Sema/TreeTransform.h clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp clang/test/OpenMP/task_ast_print.cpp clang/test/OpenMP/task_codegen.cpp clang/test/OpenMP/taskloop_ast_print.cpp clang/test/OpenMP/taskloop_codegen.cpp clang/tools/libclang/CIndex.cpp ``````````
View the diff from clang-format here. ``````````diff diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 2264c1e0c..36dde7daa 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -267,8 +267,8 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, default: break; } - llvm_unreachable("Invalid OpenMP simple clause kind"); -} + llvm_unreachable("Invalid OpenMP simple clause kind"); + } const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, unsigned Type) { @@ -540,8 +540,8 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_THREADSET_unknown: return "unknown"; #define OPENMP_THREADSET_KIND(Name) \ - case OMPC_THREADSET_##Name: \ - return #Name; +case OMPC_THREADSET_##Name: \ + return #Name; #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'threadset' clause modifier"); ``````````
https://github.com/llvm/llvm-project/pull/135807 From openmp-commits at lists.llvm.org Tue Jul 8 08:19:44 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 08:19:44 -0700 (PDT) Subject: [Openmp-commits] [openmp] 02aacc4 - Reland: [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (#147532) Message-ID: <686d3710.050a0220.4bd7b.68a9@mx.google.com> Author: Robert Imschweiler Date: 2025-07-08T17:19:40+02:00 New Revision: 02aacc4cef65387161a91399561ea7eaf2b27e8d URL: https://github.com/llvm/llvm-project/commit/02aacc4cef65387161a91399561ea7eaf2b27e8d DIFF: https://github.com/llvm/llvm-project/commit/02aacc4cef65387161a91399561ea7eaf2b27e8d.diff LOG: Reland: [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (#147532) OpenMP 6.0 12.1.2 specifies the behavior of the strict modifier for the num_threads clause on parallel directives, along with the message and severity clauses. This commit implements necessary host runtime changes. Reland https://github.com/llvm/llvm-project/pull/146403. After manual testing on a gfx90a machine, I could not reproduce the failing test, which makes it even more likely that the test has just been flaky. (Or at least that it's not an issue related to this patch.) Added: Modified: openmp/runtime/src/kmp_runtime.cpp Removed: ################################################################################ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..6afea9b994de4 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1214,6 +1214,12 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { // Reset for next parallel region this_thr->th.th_set_proc_bind = proc_bind_default; + // OpenMP 6.0 12.1.2 requires the num_threads 'strict' modifier to also have + // effect when parallel execution is disabled by a corresponding if clause + // attached to the parallel directive. + if (this_thr->th.th_nt_strict && this_thr->th.th_set_nproc > 1) + __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev, + this_thr->th.th_nt_msg); // Reset num_threads for next parallel region this_thr->th.th_set_nproc = 0; From openmp-commits at lists.llvm.org Tue Jul 8 08:19:48 2025 From: openmp-commits at lists.llvm.org (Robert Imschweiler via Openmp-commits) Date: Tue, 08 Jul 2025 08:19:48 -0700 (PDT) Subject: [Openmp-commits] [openmp] Reland: [OpenMP][clang] 6.0: num_threads strict (part 1: host runtime) (PR #147532) In-Reply-To: Message-ID: <686d3714.170a0220.373521.7042@mx.google.com> https://github.com/ro-i closed https://github.com/llvm/llvm-project/pull/147532 From openmp-commits at lists.llvm.org Tue Jul 8 08:40:48 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 08:40:48 -0700 (PDT) Subject: [Openmp-commits] [clang] [llvm] [openmp] [clang][OpenMP] New OpenMP 6.0 threadset clause (PR #135807) In-Reply-To: Message-ID: <686d3c00.050a0220.3c98c5.6bd4@mx.google.com> ================ @@ -3700,6 +3700,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, + FreeAgentFlag = 0x100, ---------------- jprotze wrote: According to the runtime patch, this should be ```suggestion FreeAgentFlag = 0x80, ``` https://github.com/llvm/llvm-project/pull/135807 From openmp-commits at lists.llvm.org Tue Jul 8 08:59:37 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 08 Jul 2025 08:59:37 -0700 (PDT) Subject: [Openmp-commits] [clang] [llvm] [openmp] [clang][OpenMP] New OpenMP 6.0 threadset clause (PR #135807) In-Reply-To: Message-ID: <686d4069.050a0220.3c172d.7591@mx.google.com> https://github.com/Ritanya-B-Bharadwaj updated https://github.com/llvm/llvm-project/pull/135807 >From 9c56e59ba9984c14c15a8d5a95a02e7192a64e8f Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Sun, 6 Apr 2025 09:33:06 -0500 Subject: [PATCH 1/8] [OpenMP] Parsing Support of ThreadSets in Task --- clang/include/clang/AST/OpenMPClause.h | 80 +++++++++++++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 6 ++ clang/include/clang/Basic/OpenMPKinds.def | 8 +- clang/include/clang/Basic/OpenMPKinds.h | 7 ++ clang/include/clang/Sema/SemaOpenMP.h | 6 ++ clang/lib/AST/OpenMPClause.cpp | 7 ++ clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 9 +++ clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 21 +++++ clang/lib/Sema/TreeTransform.h | 7 ++ clang/lib/Serialization/ASTReader.cpp | 11 +++ clang/lib/Serialization/ASTWriter.cpp | 6 ++ clang/tools/libclang/CIndex.cpp | 2 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 + 15 files changed, 176 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 572e62249b46f..81420384f885c 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1332,6 +1332,86 @@ class OMPDefaultClause : public OMPClause { } }; +/// This represents 'threadset' clause in the '#pragma omp ...' directive. +/// +/// \code +/// #pragma omp parallel threadset(shared) +/// \endcode +/// In this example directive '#pragma omp parallel' has simple 'threadset' +/// clause with kind 'shared'. +class OMPThreadsetClause : public OMPClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// A kind of the 'threadset' clause. + OpenMPThreadsetKind Kind = OMPC_THREADSET_unknown; + + /// Start location of the kind in source code. + SourceLocation KindLoc; + + /// Set kind of the clauses. + /// + /// \param K Argument of clause. + void setThreadsetKind(OpenMPThreadsetKind K) { Kind = K; } + + /// Set argument location. + /// + /// \param KLoc Argument location. + void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } + +public: + /// Build 'threadset' clause with argument \a A ('none' or 'shared'). + /// + /// \param A Argument of the clause ('none' or 'shared'). + /// \param ALoc Starting location of the argument. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPThreadsetClause(OpenMPThreadsetKind A, SourceLocation ALoc, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_threadset, StartLoc, EndLoc), + LParenLoc(LParenLoc), Kind(A), KindLoc(ALoc) {} + + /// Build an empty clause. + OMPThreadsetClause() + : OMPClause(llvm::omp::OMPC_threadset, SourceLocation(), + SourceLocation()) {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns kind of the clause. + OpenMPThreadsetKind getThreadsetKind() const { return Kind; } + + /// Returns location of clause kind. + SourceLocation getThreadsetKindLoc() const { return KindLoc; } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_threadset; + } +}; + /// This represents 'proc_bind' clause in the '#pragma omp ...' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 0530996ed20d3..d86c7d4577ac6 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3410,6 +3410,12 @@ bool RecursiveASTVisitor::VisitOMPDefaultClause(OMPDefaultClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPThreadsetClause( + OMPThreadsetClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPProcBindClause(OMPProcBindClause *) { return true; diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index b0de65df7e397..5b8889b8f7a34 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -92,6 +92,9 @@ #ifndef OPENMP_ALLOCATE_MODIFIER #define OPENMP_ALLOCATE_MODIFIER(Name) #endif +#ifndef OPENMP_THREADSET_KIND +#define OPENMP_THREADSET_KIND(Name) +#endif // Static attributes for 'schedule' clause. OPENMP_SCHEDULE_KIND(static) @@ -236,6 +239,9 @@ OPENMP_DOACROSS_MODIFIER(sink) OPENMP_DOACROSS_MODIFIER(sink_omp_cur_iteration) OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) +OPENMP_THREADSET_KIND(omp_pool) +OPENMP_THREADSET_KIND(omp_team) + #undef OPENMP_NUMTASKS_MODIFIER #undef OPENMP_GRAINSIZE_MODIFIER #undef OPENMP_BIND_KIND @@ -263,4 +269,4 @@ OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) #undef OPENMP_DEFAULTMAP_MODIFIER #undef OPENMP_DOACROSS_MODIFIER #undef OPENMP_ALLOCATE_MODIFIER - +#undef OPENMP_THREADSET_KIND diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6ca9f9c550285..e93e4bdbfb7d7 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -237,6 +237,13 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; +/// OpenMP modifiers for 'allocate' clause. +enum OpenMPThreadsetKind { +#define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_THREADSET_unknown +}; + /// Number of allowed allocate-modifiers. static constexpr unsigned NumberOfOMPAllocateClauseModifiers = OMPC_ALLOCATE_unknown; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 6498390fe96f7..d6a0167177f12 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -955,6 +955,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'threadset' clause. + OMPClause *ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'proc_bind' clause. OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, SourceLocation KindLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 2226791a70b6e..85f9c1ab47ae8 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1913,6 +1913,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) { << ")"; } +void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) { + OS << "threadset(" + << getOpenMPSimpleClauseTypeName(OMPC_threadset, + unsigned(Node->getThreadsetKind())) + << ")"; +} + void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) { OS << "proc_bind(" << getOpenMPSimpleClauseTypeName(OMPC_proc_bind, diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 83d54da9be7e5..5b18d1bf4019d 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -540,6 +540,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) { void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { } +void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {} + void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { } void OMPClauseProfiler::VisitOMPUnifiedAddressClause( diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 09921e3b1edfc..b17a3b14a5ab2 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -185,6 +185,15 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, #define OPENMP_ALLOCATE_MODIFIER(Name) .Case(#Name, OMPC_ALLOCATE_##Name) #include "clang/Basic/OpenMPKinds.def" .Default(OMPC_ALLOCATE_unknown); + case OMPC_threadset: { + unsigned Type = llvm::StringSwitch(Str) +#define OPENMP_THREADSET_KIND(Name) .Case(#Name, OMPC_THREADSET_##Name) +#include "clang/Basic/OpenMPKinds.def" + .Default(OMPC_THREADSET_unknown); + if (LangOpts.OpenMP < 60) + return OMPC_THREADSET_unknown; + return Type; + } case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index b0e6c2f07a1e7..610089affde47 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3266,6 +3266,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, else Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); break; + case OMPC_threadset: case OMPC_fail: case OMPC_default: case OMPC_proc_bind: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a382947455aef..2d57a9b54c02f 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16129,6 +16129,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( static_cast(Argument), ArgumentLoc, StartLoc, LParenLoc, EndLoc); break; + case OMPC_threadset: + Res = ActOnOpenMPThreadsetClause(static_cast(Argument), + ArgumentLoc, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: @@ -16266,6 +16270,23 @@ OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(DefaultKind Kind, OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } +OMPClause *SemaOpenMP::ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + if (Kind == OMPC_THREADSET_unknown) { + Diag(KindLoc, diag::err_omp_unexpected_clause_value) + << getListOfPossibleValues(OMPC_threadset, /*First=*/0, + /*Last=*/unsigned(OMPC_THREADSET_unknown)) + << getOpenMPClauseName(OMPC_threadset); + return nullptr; + } + + return new (getASTContext()) + OMPThreadsetClause(Kind, KindLoc, StartLoc, LParenLoc, EndLoc); +} + OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind, SourceLocation KindKwLoc, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3689d323cf25b..5aca6c40308bc 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10539,6 +10539,13 @@ TreeTransform::TransformOMPDefaultClause(OMPDefaultClause *C) { C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPThreadsetClause(OMPThreadsetClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPProcBindClause(OMPProcBindClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 8e573a11efd35..957cc12aa773a 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11440,6 +11440,17 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKindKwLoc(Record.readSourceLocation()); } +// Read the parameter of fail clause. This will have been saved when +// OMPClauseWriter is called. +void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + C->setLParenLoc(Record.readSourceLocation()); + SourceLocation ThreadsetKindLoc = Record.readSourceLocation(); + C->setThreadsetKindLoc(ThreadsetKindLoc); + OpenMPThreadsetKind TKind = + static_cast(Record.readInt()); + C->setThreadsetKind(TKind); +} + void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) { C->setProcBindKind(static_cast(Record.readInt())); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 84f7f2bc5fce4..2818748e38183 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7785,6 +7785,12 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) { Record.AddSourceLocation(C->getDefaultKindKwLoc()); } +void OMPClauseWriter::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getThreadsetKindLoc()); + Record.writeEnum(C->getThreadsetKind()); +} + void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) { Record.push_back(unsigned(C->getProcBindKind())); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 6ea6447d1d590..fc96f86df8108 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2454,6 +2454,8 @@ void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} void OMPClauseEnqueue::VisitOMPFailClause(const OMPFailClause *) {} +void OMPClauseEnqueue::VisitOMPThreadsetClause(const OMPThreadsetClause *) {} + void OMPClauseEnqueue::VisitOMPAbsentClause(const OMPAbsentClause *) {} void OMPClauseEnqueue::VisitOMPHoldsClause(const OMPHoldsClause *) {} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index e2a1449d8cc76..8c73ddc780c76 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -497,6 +497,9 @@ def OMPC_ThreadPrivate : Clause<"threadprivate"> { def OMPC_Threads : Clause<"threads"> { let clangClass = "OMPThreadsClause"; } +def OMPC_Threadset : Clause<"threadset"> { + let clangClass = "OMPThreadsetClause"; +} def OMPC_To : Clause<"to"> { let clangClass = "OMPToClause"; let flangClass = "OmpToClause"; @@ -1152,6 +1155,7 @@ def OMP_Task : Directive<"task"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; >From bcc7c388a929e49ba6805f0038a3b7cdaa475fee Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Tue, 15 Apr 2025 11:13:40 -0500 Subject: [PATCH 2/8] [clang][OpenMP] New OpenMP 6.0 threadset clause --- clang/include/clang/AST/OpenMPClause.h | 12 +-- clang/include/clang/Basic/OpenMPKinds.h | 2 +- clang/lib/AST/OpenMPClause.cpp | 1 + clang/lib/Basic/OpenMPKinds.cpp | 10 ++ clang/lib/Serialization/ASTReader.cpp | 2 +- clang/test/OpenMP/task_ast_print.cpp | 12 +++ clang/test/OpenMP/task_threadset_messages.cpp | 99 +++++++++++++++++++ clang/test/OpenMP/taskloop_ast_print.cpp | 16 +++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 9 files changed, 147 insertions(+), 8 deletions(-) create mode 100755 clang/test/OpenMP/task_threadset_messages.cpp diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 81420384f885c..aeaf5c292b1be 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1332,13 +1332,13 @@ class OMPDefaultClause : public OMPClause { } }; -/// This represents 'threadset' clause in the '#pragma omp ...' directive. +/// This represents 'threadset' clause in the '#pragma omp task ...' directive. /// /// \code -/// #pragma omp parallel threadset(shared) +/// #pragma omp task threadset(omp_pool) /// \endcode -/// In this example directive '#pragma omp parallel' has simple 'threadset' -/// clause with kind 'shared'. +/// In this example directive '#pragma omp task' has simple 'threadset' +/// clause with kind 'omp_pool'. class OMPThreadsetClause : public OMPClause { friend class OMPClauseReader; @@ -1362,9 +1362,9 @@ class OMPThreadsetClause : public OMPClause { void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } public: - /// Build 'threadset' clause with argument \a A ('none' or 'shared'). + /// Build 'threadset' clause with argument \a A ('omp_team' or 'omp_pool'). /// - /// \param A Argument of the clause ('none' or 'shared'). + /// \param A Argument of the clause ('omp_team' or 'omp_pool'). /// \param ALoc Starting location of the argument. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index e93e4bdbfb7d7..d3611f2d65989 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -237,7 +237,7 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; -/// OpenMP modifiers for 'allocate' clause. +/// OpenMP modifiers for 'threadset' clause. enum OpenMPThreadsetKind { #define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, #include "clang/Basic/OpenMPKinds.def" diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 85f9c1ab47ae8..24ab245758d93 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -121,6 +121,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_nowait: case OMPC_untied: case OMPC_mergeable: + case OMPC_threadset: case OMPC_threadprivate: case OMPC_flush: case OMPC_depobj: diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index b17a3b14a5ab2..1586a4e1f24c9 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -529,6 +529,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'allocate' clause modifier"); + case OMPC_threadset: + switch (Type) { + case OMPC_THREADSET_unknown: + return "unknown"; +#define OPENMP_THREADSET_KIND(Name) \ + case OMPC_THREADSET_##Name: \ + return #Name; +#include "clang/Basic/OpenMPKinds.def" + } + llvm_unreachable("Invalid OpenMP 'threadset' clause modifier"); case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 957cc12aa773a..b9b464bc1dae2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11440,7 +11440,7 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKindKwLoc(Record.readSourceLocation()); } -// Read the parameter of fail clause. This will have been saved when +// Read the parameter of threadset clause. This will have been saved when // OMPClauseWriter is called. void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 30fb7ab75cc87..5cfb32b8c1302 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -ast-dump %s | FileCheck %s --check-prefix=DUMP @@ -101,9 +103,11 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); +#ifndef OMP60 #pragma omp taskgroup task_reduction(-: argc) #pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) foo(); +#endif return 0; } @@ -199,6 +203,14 @@ int main(int argc, char **argv) { #pragma omp task depend(inout: omp_all_memory) foo(); // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp task threadset(omp_pool) +#pragma omp task threadset(omp_team) + foo(); +#endif + // CHECK60: #pragma omp task threadset(omp_pool) + // CHECK60: #pragma omp task threadset(omp_team) + // CHECK60-NEXT: foo(); return tmain(b, &b) + tmain(x, &x); } diff --git a/clang/test/OpenMP/task_threadset_messages.cpp b/clang/test/OpenMP/task_threadset_messages.cpp new file mode 100755 index 0000000000000..f553a2da17ab9 --- /dev/null +++ b/clang/test/OpenMP/task_threadset_messages.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +#ifdef OMP60 +struct ComplexStruct { + int data[10]; + struct InnerStruct { + float value; + } inner; +}; + +// Template class with member functions using 'threadset'. +template +class TemplateClass { +public: + void foo() { + #pragma omp task threadset(omp_pool) + { + T temp; + } + } + void bar() { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + } +}; + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in task directive. +void test_task_threadset_valid() { + int a; + #pragma omp task threadset(omp_pool) + #pragma omp task threadset(omp_team) + #pragma omp task threadset(omp_pool) if(1) + #pragma omp task threadset(omp_team) priority(5) + #pragma omp task threadset(omp_pool) depend(out: a) + #pragma omp parallel + { + #pragma omp task threadset(omp_pool) + { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 5; ++i) {} + } + } + + TemplateClass obj; + obj.foo(); + obj.bar(); +} + +// Invalid uses of 'threadset' with incorrect arguments in task directive. +void test_task_threadset_invalid_args() { + #pragma omp task threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + #pragma omp task threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + {} +} + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in taskloop directive. +void test_taskloop_threadset_valid() { + #pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) grainsize(5) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) num_tasks(2) + for (int i = 0; i < 10; ++i) {} +} + +// Invalid uses of 'threadset' with incorrect arguments in taskloop directive. +void test_taskloop_threadset_invalid_args() { + #pragma omp taskloop threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} +} + +#else +void test_threadset_not_supported() { + #pragma omp task threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp task threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp taskloop threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} +} +#endif diff --git a/clang/test/OpenMP/taskloop_ast_print.cpp b/clang/test/OpenMP/taskloop_ast_print.cpp index 1b6d7240fa66c..e4bf20af5d78e 100644 --- a/clang/test/OpenMP/taskloop_ast_print.cpp +++ b/clang/test/OpenMP/taskloop_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // expected-no-diagnostics @@ -87,6 +89,20 @@ int main(int argc, char **argv) { // CHECK-NEXT: #pragma omp cancel taskgroup // CHECK-NEXT: #pragma omp cancellation point taskgroup // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { +#pragma omp taskloop threadset(omp_pool) + for (int j = 0; j < 10; ++j) { + foo(); + } +} +#endif + // CHECK60: #pragma omp taskloop threadset(omp_team) + // CHECK60-NEXT: for (int i = 0; i < 10; ++i) { + // CHECK60: #pragma omp taskloop threadset(omp_pool) + // CHECK60-NEXT: for (int j = 0; j < 10; ++j) { + // CHECK60-NEXT: foo(); return (tmain(argc) + tmain(argv[0][0])); } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 8c73ddc780c76..14b086d5504e8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1187,6 +1187,7 @@ def OMP_TaskLoop : Directive<"taskloop"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedExclusiveClauses = [ VersionedClause, >From d700caad78a86e8acbbf92c0e01fe8378cc1d0b3 Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Sun, 20 Apr 2025 12:46:26 -0500 Subject: [PATCH 3/8] [clang] [OpenMP] Codegen support for threadset --- clang/docs/OpenMPSupport.rst | 2 +- clang/docs/ReleaseNotes.rst | 1 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++ clang/lib/Serialization/ASTReader.cpp | 3 ++ clang/test/OpenMP/task_ast_print.cpp | 18 ++++----- clang/test/OpenMP/task_codegen.cpp | 33 ++++++++++++++++ clang/test/OpenMP/taskloop_codegen.cpp | 53 ++++++++++++++++++++++++++ 7 files changed, 105 insertions(+), 11 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 83d90ffef6bc7..ee05a65c2aa12 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :`worked on` | :none:`unclaimed` | | +| threadset clause | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5e8df45e71d54..f17bb3bf44a7e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -591,6 +591,7 @@ OpenMP Support - Added support 'no_openmp_constructs' assumption clause. - Added support for 'self_maps' in map and requirement clause. - Added support for 'omp stripe' directive. +- Added support for threadset clause in task and taskloop directives. Improvements ^^^^^^^^^^^^ diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5736864d4cc6b..3d51ed0088014 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3691,6 +3691,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, + PoolFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3700,6 +3701,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (NeedsCleanup) Flags = Flags | DestructorsFlag; } + if (const auto *Clause = D.getSingleClause()) { + OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); + if (Kind == OMPC_THREADSET_omp_pool) + Flags = Flags | PoolFlag; + } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; if (D.hasClausesOfKind()) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b9b464bc1dae2..62b86b0929133 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11050,6 +11050,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_mergeable: C = new (Context) OMPMergeableClause(); break; + case llvm::omp::OMPC_threadset: + C = new (Context) OMPThreadsetClause(); + break; case llvm::omp::OMPC_read: C = new (Context) OMPReadClause(); break; diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 5cfb32b8c1302..b059f187156ee 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -103,11 +103,9 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); -#ifndef OMP60 -#pragma omp taskgroup task_reduction(-: argc) -#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) +#pragma omp taskgroup task_reduction(+: argc) +#pragma omp task if (C) mergeable priority(C) in_reduction(+: argc) foo(); -#endif return 0; } @@ -123,8 +121,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(T)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -138,8 +136,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(int)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -153,8 +151,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(long)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(+: argc) // CHECK-NEXT: foo() enum Enum {}; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index c3e6d9e6b1cf7..ba8e6945de9d0 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -41,6 +41,9 @@ // RUN: -emit-llvm -o - -DOMP51 | FileCheck %s \ // RUN: --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -Wno-vla -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER @@ -65,6 +68,7 @@ struct S { S(const S &s) : a(s.a) {} ~S() {} }; + int a; int main() { char b; @@ -147,6 +151,7 @@ int main() { + // s1 = S(); @@ -215,6 +220,19 @@ void test_omp_all_memory() } } #endif // OMP51 + +#ifdef OMP60 +void test_threadset() +{ +#pragma omp task threadset(omp_team) + { + } +#pragma omp task threadset(omp_pool) + { + } +} +#endif // OMP60 + #endif // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { @@ -10243,3 +10261,18 @@ void test_omp_all_memory() // CHECK4-51-NEXT: call void @__cxx_global_var_init() // CHECK4-51-NEXT: ret void // +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 +// CHECK6-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num, i32 1, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %0, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) +// CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index 69f8d3b160bfd..d1197607a2684 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -5,7 +5,12 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -241,4 +246,52 @@ void taskloop_with_class() { } } +#ifdef OMP60 +void test_threadset() +{ +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { + } +#pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) { + } +} +#endif // OMP60 +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK6-NEXT: %[[TMP:.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK6-NEXT: %[[TMP2:.*]] = alloca i32, align 4 +// CHECK6-NEXT: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID2:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID1:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID3:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID3:.*]], align 8 +// CHECK6-NEXT: %[[TID4:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID4:.*]], align 8 +// CHECK6-NEXT: %[[TID5:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: %[[TID6:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %[[TID6:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID7:.*]] = load i64, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 +// CHECK6-NEXT: %[[TID11:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID11:.*]], align 8 +// CHECK6-NEXT: %[[TID12:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID12:.*]], align 8 +// CHECK6-NEXT: %[[TID13:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TID13:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID14:.*]] = load i64, ptr [[TID12:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID8:.*]], i32 1, ptr %[[TID10:.*]], ptr %[[TID11:.*]], i64 %[[TID14:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: ret void + #endif >From 11deb35b539cb2a01f271d91b616252a35951dda Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Thu, 22 May 2025 09:29:08 -0500 Subject: [PATCH 4/8] Adding basic runtime support --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 ++-- clang/test/OpenMP/task_codegen.cpp | 2 +- clang/test/OpenMP/taskloop_codegen.cpp | 2 +- openmp/runtime/src/kmp.h | 6 ++++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 3d51ed0088014..99603f781a19f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3691,7 +3691,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, - PoolFlag = 0x80, + FreeAgentFlag = 0x100, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3704,7 +3704,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (const auto *Clause = D.getSingleClause()) { OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); if (Kind == OMPC_THREADSET_omp_pool) - Flags = Flags | PoolFlag; + Flags = Flags | FreeAgentFlag; } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index ba8e6945de9d0..0edf1fcac5b4c 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -10271,7 +10271,7 @@ void test_threadset() // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) // CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) -// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 257, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) // CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) // CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index d1197607a2684..b06e4bc9d79f6 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -279,7 +279,7 @@ void test_threadset() // CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) // CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) // CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) -// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 257, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) // CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 // CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 // CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index d5d667c32c643..1d93b5d169fd6 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2740,7 +2740,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned tasking_ser : 1; unsigned task_serial : 1; unsigned tasktype : 1; - unsigned reserved : 8; + unsigned reserved : 7; + unsigned free_agent_eligible : 1; unsigned hidden_helper : 1; unsigned detachable : 1; unsigned priority_specified : 1; @@ -2763,7 +2764,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ setting for the task */ unsigned detachable : 1; /* 1 == can detach */ unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned reserved : 8; /* reserved for compiler use */ + unsigned free_agent_eligible : 1; /* set if task can be executed by a free-agent thread */ + unsigned reserved : 7; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ >From 57fd6ad403f70132df013f1b0ffc711205a7f8ef Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Mon, 16 Jun 2025 23:49:54 +0530 Subject: [PATCH 5/8] Removing runtime changes --- openmp/runtime/src/kmp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 1d93b5d169fd6..d5d667c32c643 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2740,8 +2740,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned tasking_ser : 1; unsigned task_serial : 1; unsigned tasktype : 1; - unsigned reserved : 7; - unsigned free_agent_eligible : 1; + unsigned reserved : 8; unsigned hidden_helper : 1; unsigned detachable : 1; unsigned priority_specified : 1; @@ -2764,8 +2763,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ setting for the task */ unsigned detachable : 1; /* 1 == can detach */ unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned free_agent_eligible : 1; /* set if task can be executed by a free-agent thread */ - unsigned reserved : 7; /* reserved for compiler use */ + unsigned reserved : 8; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ >From bd92f54ca143b4f75a0011fdca0930a7c2919371 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 17 Jun 2025 17:36:29 +0530 Subject: [PATCH 6/8] Update OpenMPSupport.rst --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index c748fa2dcf851..f98e59e15ce85 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/135807 | +| threadset clause | :good:`mostly` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ >From de0c3874384b971ead2933accfdd776e2e63ffd8 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 17 Jun 2025 20:12:39 +0530 Subject: [PATCH 7/8] Update OpenMPSupport.rst --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index f98e59e15ce85..79cf44ed435ef 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :good:`mostly` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +| threadset clause | :part:`partial` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ >From 3aefe9718589b0de181917c744c0ab40b98218e5 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 8 Jul 2025 21:29:24 +0530 Subject: [PATCH 8/8] Update clang/lib/CodeGen/CGOpenMPRuntime.cpp Co-authored-by: Joachim --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a6bf0336680d1..67e771258475b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3700,7 +3700,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, - FreeAgentFlag = 0x100, + FreeAgentFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; From openmp-commits at lists.llvm.org Tue Jul 8 09:14:17 2025 From: openmp-commits at lists.llvm.org (Shilei Tian via Openmp-commits) Date: Tue, 08 Jul 2025 09:14:17 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <686d43d9.170a0220.279776.61df@mx.google.com> shiltian wrote: > We need the stub implementation for consistency between compiler and runtime bits Just for the record, correct me if I'm wrong, but I don't think this is necessary for consistency between the compiler and runtime, because the bit isn't set by the compiler. I'm not comfortable merging a non-NFC change without any test case. On the other hand, even if it is set by the front end, the runtime changes can always go before the compiler change with a proper test case. The test case itself can simply manually construct the necessary function call that the compiler is expected to emit. https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Tue Jul 8 10:48:07 2025 From: openmp-commits at lists.llvm.org (Shilei Tian via Openmp-commits) Date: Tue, 08 Jul 2025 10:48:07 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] OpenMP ThreadSet clause - basic runtime (PR #144409) In-Reply-To: Message-ID: <686d59d7.170a0220.21f244.e188@mx.google.com> shiltian wrote: Then check this part of my previous comment: > even if it is set by the front end, the runtime changes can always go before the compiler change with a proper test case. The test case itself can simply manually construct the necessary function call that the compiler is expected to emit. We already have a bunch of tests in `libomp` that "emulates" code generated by the compiler. https://github.com/llvm/llvm-project/pull/144409 From openmp-commits at lists.llvm.org Thu Jul 10 10:22:07 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 10 Jul 2025 10:22:07 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <686ff6bf.170a0220.1b0533.9e60@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 1/4] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 2/4] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 3/4] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 4/4] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } From openmp-commits at lists.llvm.org Thu Jul 10 10:31:30 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 10 Jul 2025 10:31:30 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <686ff8f2.170a0220.3bc0df.a311@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 1/5] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 2/5] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 3/5] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 4/5] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 5/5] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } From openmp-commits at lists.llvm.org Thu Jul 10 12:59:21 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 10 Jul 2025 12:59:21 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <68701b99.170a0220.1c34d0.ac72@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 1/7] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 2/7] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 3/7] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 4/7] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 5/7] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } >From fd69d49e8509161925d03015e5706c36a47b64b2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:17:47 +0200 Subject: [PATCH 6/7] remove unnecessary conditions --- offload/libomptarget/OpenMP/InteropAPI.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 57be23f10d24d..fa6325333c606 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -286,9 +286,6 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { @@ -322,9 +319,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) DP("Warning: nowait flag on interop destroy not supported " @@ -348,11 +342,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - if (!Interop) { - DP("Call to %s with invalid interop\n", __func__); - return omp_irc_empty; - } - Interop->addCompletionCb(cb, data); return omp_irc_success; >From fbca38468cd004afb311d5066abcc3c5ca96392e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:52:30 +0200 Subject: [PATCH 7/7] another corner case when unloading --- offload/libomptarget/PluginManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 2cc1314e7a4f0..f5d913f2b8909 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -132,7 +132,8 @@ void PluginManager::initializeAllDevices() { std::atexit([]() { // Interop cleanup should be done before the plugins are deinitialized as // the backend libraries may be already unloaded. - PM->InteropTbl.clear(); + if (PM) + PM->InteropTbl.clear(); }); } From openmp-commits at lists.llvm.org Mon Jul 14 02:53:34 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 02:53:34 -0700 (PDT) Subject: [Openmp-commits] [clang] [llvm] [openmp] [clang][OpenMP] New OpenMP 6.0 threadset clause (PR #135807) In-Reply-To: Message-ID: <6874d39e.050a0220.354f55.1425@mx.google.com> https://github.com/Ritanya-B-Bharadwaj updated https://github.com/llvm/llvm-project/pull/135807 >From 9c56e59ba9984c14c15a8d5a95a02e7192a64e8f Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Sun, 6 Apr 2025 09:33:06 -0500 Subject: [PATCH 1/9] [OpenMP] Parsing Support of ThreadSets in Task --- clang/include/clang/AST/OpenMPClause.h | 80 +++++++++++++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 6 ++ clang/include/clang/Basic/OpenMPKinds.def | 8 +- clang/include/clang/Basic/OpenMPKinds.h | 7 ++ clang/include/clang/Sema/SemaOpenMP.h | 6 ++ clang/lib/AST/OpenMPClause.cpp | 7 ++ clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 9 +++ clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 21 +++++ clang/lib/Sema/TreeTransform.h | 7 ++ clang/lib/Serialization/ASTReader.cpp | 11 +++ clang/lib/Serialization/ASTWriter.cpp | 6 ++ clang/tools/libclang/CIndex.cpp | 2 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 + 15 files changed, 176 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 572e62249b46f..81420384f885c 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1332,6 +1332,86 @@ class OMPDefaultClause : public OMPClause { } }; +/// This represents 'threadset' clause in the '#pragma omp ...' directive. +/// +/// \code +/// #pragma omp parallel threadset(shared) +/// \endcode +/// In this example directive '#pragma omp parallel' has simple 'threadset' +/// clause with kind 'shared'. +class OMPThreadsetClause : public OMPClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// A kind of the 'threadset' clause. + OpenMPThreadsetKind Kind = OMPC_THREADSET_unknown; + + /// Start location of the kind in source code. + SourceLocation KindLoc; + + /// Set kind of the clauses. + /// + /// \param K Argument of clause. + void setThreadsetKind(OpenMPThreadsetKind K) { Kind = K; } + + /// Set argument location. + /// + /// \param KLoc Argument location. + void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } + +public: + /// Build 'threadset' clause with argument \a A ('none' or 'shared'). + /// + /// \param A Argument of the clause ('none' or 'shared'). + /// \param ALoc Starting location of the argument. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPThreadsetClause(OpenMPThreadsetKind A, SourceLocation ALoc, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_threadset, StartLoc, EndLoc), + LParenLoc(LParenLoc), Kind(A), KindLoc(ALoc) {} + + /// Build an empty clause. + OMPThreadsetClause() + : OMPClause(llvm::omp::OMPC_threadset, SourceLocation(), + SourceLocation()) {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns kind of the clause. + OpenMPThreadsetKind getThreadsetKind() const { return Kind; } + + /// Returns location of clause kind. + SourceLocation getThreadsetKindLoc() const { return KindLoc; } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_threadset; + } +}; + /// This represents 'proc_bind' clause in the '#pragma omp ...' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 0530996ed20d3..d86c7d4577ac6 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3410,6 +3410,12 @@ bool RecursiveASTVisitor::VisitOMPDefaultClause(OMPDefaultClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPThreadsetClause( + OMPThreadsetClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPProcBindClause(OMPProcBindClause *) { return true; diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index b0de65df7e397..5b8889b8f7a34 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -92,6 +92,9 @@ #ifndef OPENMP_ALLOCATE_MODIFIER #define OPENMP_ALLOCATE_MODIFIER(Name) #endif +#ifndef OPENMP_THREADSET_KIND +#define OPENMP_THREADSET_KIND(Name) +#endif // Static attributes for 'schedule' clause. OPENMP_SCHEDULE_KIND(static) @@ -236,6 +239,9 @@ OPENMP_DOACROSS_MODIFIER(sink) OPENMP_DOACROSS_MODIFIER(sink_omp_cur_iteration) OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) +OPENMP_THREADSET_KIND(omp_pool) +OPENMP_THREADSET_KIND(omp_team) + #undef OPENMP_NUMTASKS_MODIFIER #undef OPENMP_GRAINSIZE_MODIFIER #undef OPENMP_BIND_KIND @@ -263,4 +269,4 @@ OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration) #undef OPENMP_DEFAULTMAP_MODIFIER #undef OPENMP_DOACROSS_MODIFIER #undef OPENMP_ALLOCATE_MODIFIER - +#undef OPENMP_THREADSET_KIND diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 6ca9f9c550285..e93e4bdbfb7d7 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -237,6 +237,13 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; +/// OpenMP modifiers for 'allocate' clause. +enum OpenMPThreadsetKind { +#define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_THREADSET_unknown +}; + /// Number of allowed allocate-modifiers. static constexpr unsigned NumberOfOMPAllocateClauseModifiers = OMPC_ALLOCATE_unknown; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 6498390fe96f7..d6a0167177f12 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -955,6 +955,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + /// Called on well-formed 'threadset' clause. + OMPClause *ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// Called on well-formed 'proc_bind' clause. OMPClause *ActOnOpenMPProcBindClause(llvm::omp::ProcBindKind Kind, SourceLocation KindLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 2226791a70b6e..85f9c1ab47ae8 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1913,6 +1913,13 @@ void OMPClausePrinter::VisitOMPDefaultClause(OMPDefaultClause *Node) { << ")"; } +void OMPClausePrinter::VisitOMPThreadsetClause(OMPThreadsetClause *Node) { + OS << "threadset(" + << getOpenMPSimpleClauseTypeName(OMPC_threadset, + unsigned(Node->getThreadsetKind())) + << ")"; +} + void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) { OS << "proc_bind(" << getOpenMPSimpleClauseTypeName(OMPC_proc_bind, diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 83d54da9be7e5..5b18d1bf4019d 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -540,6 +540,8 @@ void OMPClauseProfiler::VisitOMPNocontextClause(const OMPNocontextClause *C) { void OMPClauseProfiler::VisitOMPDefaultClause(const OMPDefaultClause *C) { } +void OMPClauseProfiler::VisitOMPThreadsetClause(const OMPThreadsetClause *C) {} + void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { } void OMPClauseProfiler::VisitOMPUnifiedAddressClause( diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 09921e3b1edfc..b17a3b14a5ab2 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -185,6 +185,15 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, #define OPENMP_ALLOCATE_MODIFIER(Name) .Case(#Name, OMPC_ALLOCATE_##Name) #include "clang/Basic/OpenMPKinds.def" .Default(OMPC_ALLOCATE_unknown); + case OMPC_threadset: { + unsigned Type = llvm::StringSwitch(Str) +#define OPENMP_THREADSET_KIND(Name) .Case(#Name, OMPC_THREADSET_##Name) +#include "clang/Basic/OpenMPKinds.def" + .Default(OMPC_THREADSET_unknown); + if (LangOpts.OpenMP < 60) + return OMPC_THREADSET_unknown; + return Type; + } case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index b0e6c2f07a1e7..610089affde47 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3266,6 +3266,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, else Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); break; + case OMPC_threadset: case OMPC_fail: case OMPC_default: case OMPC_proc_bind: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a382947455aef..2d57a9b54c02f 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16129,6 +16129,10 @@ OMPClause *SemaOpenMP::ActOnOpenMPSimpleClause( static_cast(Argument), ArgumentLoc, StartLoc, LParenLoc, EndLoc); break; + case OMPC_threadset: + Res = ActOnOpenMPThreadsetClause(static_cast(Argument), + ArgumentLoc, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: @@ -16266,6 +16270,23 @@ OMPClause *SemaOpenMP::ActOnOpenMPDefaultClause(DefaultKind Kind, OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc); } +OMPClause *SemaOpenMP::ActOnOpenMPThreadsetClause(OpenMPThreadsetKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + if (Kind == OMPC_THREADSET_unknown) { + Diag(KindLoc, diag::err_omp_unexpected_clause_value) + << getListOfPossibleValues(OMPC_threadset, /*First=*/0, + /*Last=*/unsigned(OMPC_THREADSET_unknown)) + << getOpenMPClauseName(OMPC_threadset); + return nullptr; + } + + return new (getASTContext()) + OMPThreadsetClause(Kind, KindLoc, StartLoc, LParenLoc, EndLoc); +} + OMPClause *SemaOpenMP::ActOnOpenMPProcBindClause(ProcBindKind Kind, SourceLocation KindKwLoc, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3689d323cf25b..5aca6c40308bc 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -10539,6 +10539,13 @@ TreeTransform::TransformOMPDefaultClause(OMPDefaultClause *C) { C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPThreadsetClause(OMPThreadsetClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPProcBindClause(OMPProcBindClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 8e573a11efd35..957cc12aa773a 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11440,6 +11440,17 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKindKwLoc(Record.readSourceLocation()); } +// Read the parameter of fail clause. This will have been saved when +// OMPClauseWriter is called. +void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + C->setLParenLoc(Record.readSourceLocation()); + SourceLocation ThreadsetKindLoc = Record.readSourceLocation(); + C->setThreadsetKindLoc(ThreadsetKindLoc); + OpenMPThreadsetKind TKind = + static_cast(Record.readInt()); + C->setThreadsetKind(TKind); +} + void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) { C->setProcBindKind(static_cast(Record.readInt())); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 84f7f2bc5fce4..2818748e38183 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7785,6 +7785,12 @@ void OMPClauseWriter::VisitOMPDefaultClause(OMPDefaultClause *C) { Record.AddSourceLocation(C->getDefaultKindKwLoc()); } +void OMPClauseWriter::VisitOMPThreadsetClause(OMPThreadsetClause *C) { + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getThreadsetKindLoc()); + Record.writeEnum(C->getThreadsetKind()); +} + void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) { Record.push_back(unsigned(C->getProcBindKind())); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 6ea6447d1d590..fc96f86df8108 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2454,6 +2454,8 @@ void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} void OMPClauseEnqueue::VisitOMPFailClause(const OMPFailClause *) {} +void OMPClauseEnqueue::VisitOMPThreadsetClause(const OMPThreadsetClause *) {} + void OMPClauseEnqueue::VisitOMPAbsentClause(const OMPAbsentClause *) {} void OMPClauseEnqueue::VisitOMPHoldsClause(const OMPHoldsClause *) {} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index e2a1449d8cc76..8c73ddc780c76 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -497,6 +497,9 @@ def OMPC_ThreadPrivate : Clause<"threadprivate"> { def OMPC_Threads : Clause<"threads"> { let clangClass = "OMPThreadsClause"; } +def OMPC_Threadset : Clause<"threadset"> { + let clangClass = "OMPThreadsetClause"; +} def OMPC_To : Clause<"to"> { let clangClass = "OMPToClause"; let flangClass = "OmpToClause"; @@ -1152,6 +1155,7 @@ def OMP_Task : Directive<"task"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; >From bcc7c388a929e49ba6805f0038a3b7cdaa475fee Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Tue, 15 Apr 2025 11:13:40 -0500 Subject: [PATCH 2/9] [clang][OpenMP] New OpenMP 6.0 threadset clause --- clang/include/clang/AST/OpenMPClause.h | 12 +-- clang/include/clang/Basic/OpenMPKinds.h | 2 +- clang/lib/AST/OpenMPClause.cpp | 1 + clang/lib/Basic/OpenMPKinds.cpp | 10 ++ clang/lib/Serialization/ASTReader.cpp | 2 +- clang/test/OpenMP/task_ast_print.cpp | 12 +++ clang/test/OpenMP/task_threadset_messages.cpp | 99 +++++++++++++++++++ clang/test/OpenMP/taskloop_ast_print.cpp | 16 +++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 9 files changed, 147 insertions(+), 8 deletions(-) create mode 100755 clang/test/OpenMP/task_threadset_messages.cpp diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 81420384f885c..aeaf5c292b1be 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1332,13 +1332,13 @@ class OMPDefaultClause : public OMPClause { } }; -/// This represents 'threadset' clause in the '#pragma omp ...' directive. +/// This represents 'threadset' clause in the '#pragma omp task ...' directive. /// /// \code -/// #pragma omp parallel threadset(shared) +/// #pragma omp task threadset(omp_pool) /// \endcode -/// In this example directive '#pragma omp parallel' has simple 'threadset' -/// clause with kind 'shared'. +/// In this example directive '#pragma omp task' has simple 'threadset' +/// clause with kind 'omp_pool'. class OMPThreadsetClause : public OMPClause { friend class OMPClauseReader; @@ -1362,9 +1362,9 @@ class OMPThreadsetClause : public OMPClause { void setThreadsetKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } public: - /// Build 'threadset' clause with argument \a A ('none' or 'shared'). + /// Build 'threadset' clause with argument \a A ('omp_team' or 'omp_pool'). /// - /// \param A Argument of the clause ('none' or 'shared'). + /// \param A Argument of the clause ('omp_team' or 'omp_pool'). /// \param ALoc Starting location of the argument. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index e93e4bdbfb7d7..d3611f2d65989 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -237,7 +237,7 @@ enum OpenMPAllocateClauseModifier { OMPC_ALLOCATE_unknown }; -/// OpenMP modifiers for 'allocate' clause. +/// OpenMP modifiers for 'threadset' clause. enum OpenMPThreadsetKind { #define OPENMP_THREADSET_KIND(Name) OMPC_THREADSET_##Name, #include "clang/Basic/OpenMPKinds.def" diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 85f9c1ab47ae8..24ab245758d93 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -121,6 +121,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_nowait: case OMPC_untied: case OMPC_mergeable: + case OMPC_threadset: case OMPC_threadprivate: case OMPC_flush: case OMPC_depobj: diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index b17a3b14a5ab2..1586a4e1f24c9 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -529,6 +529,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'allocate' clause modifier"); + case OMPC_threadset: + switch (Type) { + case OMPC_THREADSET_unknown: + return "unknown"; +#define OPENMP_THREADSET_KIND(Name) \ + case OMPC_THREADSET_##Name: \ + return #Name; +#include "clang/Basic/OpenMPKinds.def" + } + llvm_unreachable("Invalid OpenMP 'threadset' clause modifier"); case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 957cc12aa773a..b9b464bc1dae2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11440,7 +11440,7 @@ void OMPClauseReader::VisitOMPDefaultClause(OMPDefaultClause *C) { C->setDefaultKindKwLoc(Record.readSourceLocation()); } -// Read the parameter of fail clause. This will have been saved when +// Read the parameter of threadset clause. This will have been saved when // OMPClauseWriter is called. void OMPClauseReader::VisitOMPThreadsetClause(OMPThreadsetClause *C) { C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 30fb7ab75cc87..5cfb32b8c1302 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -Wno-vla -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -Wno-vla -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify -Wno-vla %s -ast-print | FileCheck %s // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -ast-dump %s | FileCheck %s --check-prefix=DUMP @@ -101,9 +103,11 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); +#ifndef OMP60 #pragma omp taskgroup task_reduction(-: argc) #pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) foo(); +#endif return 0; } @@ -199,6 +203,14 @@ int main(int argc, char **argv) { #pragma omp task depend(inout: omp_all_memory) foo(); // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp task threadset(omp_pool) +#pragma omp task threadset(omp_team) + foo(); +#endif + // CHECK60: #pragma omp task threadset(omp_pool) + // CHECK60: #pragma omp task threadset(omp_team) + // CHECK60-NEXT: foo(); return tmain(b, &b) + tmain(x, &x); } diff --git a/clang/test/OpenMP/task_threadset_messages.cpp b/clang/test/OpenMP/task_threadset_messages.cpp new file mode 100755 index 0000000000000..f553a2da17ab9 --- /dev/null +++ b/clang/test/OpenMP/task_threadset_messages.cpp @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +// RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -fopenmp-version=50 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected,omp51 -fopenmp-simd -fopenmp-version=51 -std=c++11 -ferror-limit 200 -o - %s +// RUN: %clang_cc1 -verify=expected -DOMP60 -fopenmp-simd -fopenmp-version=60 -std=c++11 -ferror-limit 200 -o - %s + +#ifdef OMP60 +struct ComplexStruct { + int data[10]; + struct InnerStruct { + float value; + } inner; +}; + +// Template class with member functions using 'threadset'. +template +class TemplateClass { +public: + void foo() { + #pragma omp task threadset(omp_pool) + { + T temp; + } + } + void bar() { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + } +}; + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in task directive. +void test_task_threadset_valid() { + int a; + #pragma omp task threadset(omp_pool) + #pragma omp task threadset(omp_team) + #pragma omp task threadset(omp_pool) if(1) + #pragma omp task threadset(omp_team) priority(5) + #pragma omp task threadset(omp_pool) depend(out: a) + #pragma omp parallel + { + #pragma omp task threadset(omp_pool) + { + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 5; ++i) {} + } + } + + TemplateClass obj; + obj.foo(); + obj.bar(); +} + +// Invalid uses of 'threadset' with incorrect arguments in task directive. +void test_task_threadset_invalid_args() { + #pragma omp task threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + #pragma omp task threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + #pragma omp task threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + {} +} + +// Valid uses of 'threadset' with 'omp_pool' and 'omp_team' in taskloop directive. +void test_taskloop_threadset_valid() { + #pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) grainsize(5) + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_team) num_tasks(2) + for (int i = 0; i < 10; ++i) {} +} + +// Invalid uses of 'threadset' with incorrect arguments in taskloop directive. +void test_taskloop_threadset_invalid_args() { + #pragma omp taskloop threadset(invalid_arg) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(123) // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool, omp_team) // expected-error {{expected ')'}} expected-note {{to match this '('}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset() // expected-error {{expected 'omp_pool' or 'omp_team' in OpenMP clause 'threadset'}} + for (int i = 0; i < 10; ++i) {} +} + +#else +void test_threadset_not_supported() { + #pragma omp task threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp task threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp task'}} + #pragma omp taskloop threadset(omp_team) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} + #pragma omp taskloop threadset(omp_pool) // omp45-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp50-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} omp51-error {{unexpected OpenMP clause 'threadset' in directive '#pragma omp taskloop'}} + for (int i = 0; i < 10; ++i) {} +} +#endif diff --git a/clang/test/OpenMP/taskloop_ast_print.cpp b/clang/test/OpenMP/taskloop_ast_print.cpp index 1b6d7240fa66c..e4bf20af5d78e 100644 --- a/clang/test/OpenMP/taskloop_ast_print.cpp +++ b/clang/test/OpenMP/taskloop_ast_print.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -DOMP60 -ast-print %s | FileCheck %s --check-prefix=CHECK60 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s -ast-print | FileCheck %s // expected-no-diagnostics @@ -87,6 +89,20 @@ int main(int argc, char **argv) { // CHECK-NEXT: #pragma omp cancel taskgroup // CHECK-NEXT: #pragma omp cancellation point taskgroup // CHECK-NEXT: foo(); +#ifdef OMP60 +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { +#pragma omp taskloop threadset(omp_pool) + for (int j = 0; j < 10; ++j) { + foo(); + } +} +#endif + // CHECK60: #pragma omp taskloop threadset(omp_team) + // CHECK60-NEXT: for (int i = 0; i < 10; ++i) { + // CHECK60: #pragma omp taskloop threadset(omp_pool) + // CHECK60-NEXT: for (int j = 0; j < 10; ++j) { + // CHECK60-NEXT: foo(); return (tmain(argc) + tmain(argv[0][0])); } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 8c73ddc780c76..14b086d5504e8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1187,6 +1187,7 @@ def OMP_TaskLoop : Directive<"taskloop"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedExclusiveClauses = [ VersionedClause, >From d700caad78a86e8acbbf92c0e01fe8378cc1d0b3 Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Sun, 20 Apr 2025 12:46:26 -0500 Subject: [PATCH 3/9] [clang] [OpenMP] Codegen support for threadset --- clang/docs/OpenMPSupport.rst | 2 +- clang/docs/ReleaseNotes.rst | 1 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++ clang/lib/Serialization/ASTReader.cpp | 3 ++ clang/test/OpenMP/task_ast_print.cpp | 18 ++++----- clang/test/OpenMP/task_codegen.cpp | 33 ++++++++++++++++ clang/test/OpenMP/taskloop_codegen.cpp | 53 ++++++++++++++++++++++++++ 7 files changed, 105 insertions(+), 11 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 83d90ffef6bc7..ee05a65c2aa12 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :`worked on` | :none:`unclaimed` | | +| threadset clause | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5e8df45e71d54..f17bb3bf44a7e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -591,6 +591,7 @@ OpenMP Support - Added support 'no_openmp_constructs' assumption clause. - Added support for 'self_maps' in map and requirement clause. - Added support for 'omp stripe' directive. +- Added support for threadset clause in task and taskloop directives. Improvements ^^^^^^^^^^^^ diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5736864d4cc6b..3d51ed0088014 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3691,6 +3691,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, + PoolFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3700,6 +3701,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (NeedsCleanup) Flags = Flags | DestructorsFlag; } + if (const auto *Clause = D.getSingleClause()) { + OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); + if (Kind == OMPC_THREADSET_omp_pool) + Flags = Flags | PoolFlag; + } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; if (D.hasClausesOfKind()) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b9b464bc1dae2..62b86b0929133 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11050,6 +11050,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_mergeable: C = new (Context) OMPMergeableClause(); break; + case llvm::omp::OMPC_threadset: + C = new (Context) OMPThreadsetClause(); + break; case llvm::omp::OMPC_read: C = new (Context) OMPReadClause(); break; diff --git a/clang/test/OpenMP/task_ast_print.cpp b/clang/test/OpenMP/task_ast_print.cpp index 5cfb32b8c1302..b059f187156ee 100644 --- a/clang/test/OpenMP/task_ast_print.cpp +++ b/clang/test/OpenMP/task_ast_print.cpp @@ -103,11 +103,9 @@ T tmain(T argc, T *argv) { a = 2; #pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S::TS > 0) priority(argc) affinity(argc, argv[b:argc], arr[:], ([argc][sizeof(T)])argv) foo(); -#ifndef OMP60 -#pragma omp taskgroup task_reduction(-: argc) -#pragma omp task if (C) mergeable priority(C) in_reduction(-: argc) +#pragma omp taskgroup task_reduction(+: argc) +#pragma omp task if (C) mergeable priority(C) in_reduction(+: argc) foo(); -#endif return 0; } @@ -123,8 +121,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(T)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> int tmain(int argc, int *argv) { // CHECK-NEXT: int b = argc, c, d, e, f, g; @@ -138,8 +136,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(int)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5) in_reduction(+: argc) // CHECK-NEXT: foo() // CHECK: template<> long tmain(long argc, long *argv) { // CHECK-NEXT: long b = argc, c, d, e, f, g; @@ -153,8 +151,8 @@ T tmain(T argc, T *argv) { // CHECK-NEXT: a = 2; // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S::TS > 0) priority(argc) affinity(argc,argv[b:argc],arr[:],([argc][sizeof(long)])argv) // CHECK-NEXT: foo() -// CHECK-NEXT: #pragma omp taskgroup task_reduction(-: argc) -// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(-: argc) +// CHECK-NEXT: #pragma omp taskgroup task_reduction(+: argc) +// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1) in_reduction(+: argc) // CHECK-NEXT: foo() enum Enum {}; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index c3e6d9e6b1cf7..ba8e6945de9d0 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -41,6 +41,9 @@ // RUN: -emit-llvm -o - -DOMP51 | FileCheck %s \ // RUN: --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -verify -Wno-vla -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify -Wno-vla %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER @@ -65,6 +68,7 @@ struct S { S(const S &s) : a(s.a) {} ~S() {} }; + int a; int main() { char b; @@ -147,6 +151,7 @@ int main() { + // s1 = S(); @@ -215,6 +220,19 @@ void test_omp_all_memory() } } #endif // OMP51 + +#ifdef OMP60 +void test_threadset() +{ +#pragma omp task threadset(omp_team) + { + } +#pragma omp task threadset(omp_pool) + { + } +} +#endif // OMP60 + #endif // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { @@ -10243,3 +10261,18 @@ void test_omp_all_memory() // CHECK4-51-NEXT: call void @__cxx_global_var_init() // CHECK4-51-NEXT: ret void // +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 +// CHECK6-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num, i32 1, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %0, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 +// CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) +// CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) +// CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index 69f8d3b160bfd..d1197607a2684 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -5,7 +5,12 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -DOMP60 -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -241,4 +246,52 @@ void taskloop_with_class() { } } +#ifdef OMP60 +void test_threadset() +{ +#pragma omp taskloop threadset(omp_team) + for (int i = 0; i < 10; ++i) { + } +#pragma omp taskloop threadset(omp_pool) + for (int i = 0; i < 10; ++i) { + } +} +#endif // OMP60 +// CHECK6-LABEL: define void @_Z14test_threadsetv() +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK6-NEXT: %[[TMP:.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK6-NEXT: %[[TMP2:.*]] = alloca i32, align 4 +// CHECK6-NEXT: %[[TID0:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID1:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID2:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID1:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID3:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID3:.*]], align 8 +// CHECK6-NEXT: %[[TID4:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID4:.*]], align 8 +// CHECK6-NEXT: %[[TID5:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: %[[TID6:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID2:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %[[TID6:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID7:.*]] = load i64, ptr %[[TID5:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 +// CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 +// CHECK6-NEXT: %[[TID11:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr %[[TID11:.*]], align 8 +// CHECK6-NEXT: %[[TID12:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr %[[TID12:.*]], align 8 +// CHECK6-NEXT: %[[TID13:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TID13:.*]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: %[[TID14:.*]] = load i64, ptr [[TID12:.*]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID8:.*]], i32 1, ptr %[[TID10:.*]], ptr %[[TID11:.*]], i64 %[[TID14:.*]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) +// CHECK6-NEXT: ret void + #endif >From 11deb35b539cb2a01f271d91b616252a35951dda Mon Sep 17 00:00:00 2001 From: Ritanya B Bharadwaj Date: Thu, 22 May 2025 09:29:08 -0500 Subject: [PATCH 4/9] Adding basic runtime support --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 ++-- clang/test/OpenMP/task_codegen.cpp | 2 +- clang/test/OpenMP/taskloop_codegen.cpp | 2 +- openmp/runtime/src/kmp.h | 6 ++++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 3d51ed0088014..99603f781a19f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3691,7 +3691,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, - PoolFlag = 0x80, + FreeAgentFlag = 0x100, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -3704,7 +3704,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (const auto *Clause = D.getSingleClause()) { OpenMPThreadsetKind Kind = Clause->getThreadsetKind(); if (Kind == OMPC_THREADSET_omp_pool) - Flags = Flags | PoolFlag; + Flags = Flags | FreeAgentFlag; } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index ba8e6945de9d0..0edf1fcac5b4c 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -10271,7 +10271,7 @@ void test_threadset() // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR:[0-9]+]]) // CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num1, ptr %0) // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) -// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 129, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) +// CHECK6-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %omp_global_thread_num3, i32 257, i64 40, i64 1, ptr @.omp_task_entry..[[ENTRY2:[0-9]+]]) // CHECK6-NEXT: getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %3, i32 0, i32 0 // CHECK6-NEXT: call i32 @__kmpc_global_thread_num(ptr @[[GLOB_PTR2:[0-9]+]]) // CHECK6-NEXT: call i32 @__kmpc_omp_task(ptr @1, i32 %omp_global_thread_num4, ptr %3) diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index d1197607a2684..b06e4bc9d79f6 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -279,7 +279,7 @@ void test_threadset() // CHECK6-NEXT: call void @__kmpc_taskloop(ptr @1, i32 %[[TID0:.*]], ptr %[[TID1:.*]], i32 1, ptr %[[TID3:.*]], ptr %4, i64 %[[TID7:.*]], i32 1, i32 0, i64 0, ptr null) // CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @1, i32 %[[TID0:.*]]) // CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @1, i32 %[[TID0:.*]]) -// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 129, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) +// CHECK6-NEXT: %[[TID8:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[TID0:.*]], i32 257, i64 80, i64 1, ptr @.omp_task_entry..[[ENTRY1:[0-9]+]]) // CHECK6-NEXT: %[[TID9:.*]] = getelementptr inbounds nuw %struct.kmp_task_t_with_privates{{.*}}, ptr %[[TID8:.*]], i32 0, i32 0 // CHECK6-NEXT: %[[TID10:.*]] = getelementptr inbounds nuw %struct.kmp_task_t{{.*}}, ptr %[[TID9:.*]], i32 0, i32 5 // CHECK6-NEXT: store i64 0, ptr %[[TID10:.*]], align 8 diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index d5d667c32c643..1d93b5d169fd6 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2740,7 +2740,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned tasking_ser : 1; unsigned task_serial : 1; unsigned tasktype : 1; - unsigned reserved : 8; + unsigned reserved : 7; + unsigned free_agent_eligible : 1; unsigned hidden_helper : 1; unsigned detachable : 1; unsigned priority_specified : 1; @@ -2763,7 +2764,8 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ setting for the task */ unsigned detachable : 1; /* 1 == can detach */ unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned reserved : 8; /* reserved for compiler use */ + unsigned free_agent_eligible : 1; /* set if task can be executed by a free-agent thread */ + unsigned reserved : 7; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ >From 57fd6ad403f70132df013f1b0ffc711205a7f8ef Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Mon, 16 Jun 2025 23:49:54 +0530 Subject: [PATCH 5/9] Removing runtime changes --- openmp/runtime/src/kmp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 1d93b5d169fd6..d5d667c32c643 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2740,8 +2740,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned tasking_ser : 1; unsigned task_serial : 1; unsigned tasktype : 1; - unsigned reserved : 7; - unsigned free_agent_eligible : 1; + unsigned reserved : 8; unsigned hidden_helper : 1; unsigned detachable : 1; unsigned priority_specified : 1; @@ -2764,8 +2763,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ setting for the task */ unsigned detachable : 1; /* 1 == can detach */ unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned free_agent_eligible : 1; /* set if task can be executed by a free-agent thread */ - unsigned reserved : 7; /* reserved for compiler use */ + unsigned reserved : 8; /* reserved for compiler use */ /* Library flags */ /* Total library flags must be 16 bits */ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ >From bd92f54ca143b4f75a0011fdca0930a7c2919371 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 17 Jun 2025 17:36:29 +0530 Subject: [PATCH 6/9] Update OpenMPSupport.rst --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index c748fa2dcf851..f98e59e15ce85 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :good:`done` | :none:`unclaimed` | https://github.com/llvm/llvm-project/pull/135807 | +| threadset clause | :good:`mostly` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ >From de0c3874384b971ead2933accfdd776e2e63ffd8 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 17 Jun 2025 20:12:39 +0530 Subject: [PATCH 7/9] Update OpenMPSupport.rst --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index f98e59e15ce85..79cf44ed435ef 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -364,7 +364,7 @@ implementation. +=============================================================+===========================+===========================+==========================================================================+ | free-agent threads | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| threadset clause | :good:`mostly` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +| threadset clause | :part:`partial` | :none:`unclaimed` | Parse/Sema/Codegen: https://github.com/llvm/llvm-project/pull/135807 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | Recording of task graphs | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ >From 3aefe9718589b0de181917c744c0ab40b98218e5 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Tue, 8 Jul 2025 21:29:24 +0530 Subject: [PATCH 8/9] Update clang/lib/CodeGen/CGOpenMPRuntime.cpp Co-authored-by: Joachim --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a6bf0336680d1..67e771258475b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3700,7 +3700,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, - FreeAgentFlag = 0x100, + FreeAgentFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; >From 4f899b743b991818c9f016cc989666592c219fd5 Mon Sep 17 00:00:00 2001 From: Ritanya-B-Bharadwaj Date: Mon, 14 Jul 2025 15:23:21 +0530 Subject: [PATCH 9/9] Update OpenMPKinds.cpp --- clang/lib/Basic/OpenMPKinds.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 2264c1e0c4e0d..fe91e225068f0 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -192,6 +192,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, .Default(OMPC_THREADSET_unknown); if (LangOpts.OpenMP < 60) return OMPC_THREADSET_unknown; + } case OMPC_num_threads: { unsigned Type = llvm::StringSwitch(Str) #define OPENMP_NUMTHREADS_MODIFIER(Name) .Case(#Name, OMPC_NUMTHREADS_##Name) From openmp-commits at lists.llvm.org Mon Jul 14 04:36:04 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?B?Um9nZXIgRmVycmVyIEliw6HDsWV6?= via Openmp-commits) Date: Mon, 14 Jul 2025 04:36:04 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6874eba4.170a0220.1c17c3.e98b@mx.google.com> https://github.com/rofirrim updated https://github.com/llvm/llvm-project/pull/139293 >From fb91129401f61b332fc1147e5a81d553abd7658a Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:25:33 +0000 Subject: [PATCH 01/12] Add fuse directive patch --- clang/include/clang-c/Index.h | 4 + clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 99 +- .../clang/Basic/DiagnosticSemaKinds.td | 8 + clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 27 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 25 + clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 4 + clang/lib/Basic/OpenMPKinds.cpp | 2 +- clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 600 +++++++ clang/lib/Sema/TreeTransform.h | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 11 + clang/lib/Serialization/ASTWriterStmt.cpp | 6 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + clang/test/OpenMP/fuse_ast_print.cpp | 278 +++ clang/test/OpenMP/fuse_codegen.cpp | 1511 +++++++++++++++++ clang/test/OpenMP/fuse_messages.cpp | 76 + clang/tools/libclang/CIndex.cpp | 7 + clang/tools/libclang/CXCursor.cpp | 3 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 + .../runtime/test/transform/fuse/foreach.cpp | 192 +++ openmp/runtime/test/transform/fuse/intfor.c | 50 + .../runtime/test/transform/fuse/iterfor.cpp | 194 +++ .../fuse/parallel-wsloop-collapse-foreach.cpp | 208 +++ .../fuse/parallel-wsloop-collapse-intfor.c | 45 + 31 files changed, 3387 insertions(+), 2 deletions(-) create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp create mode 100644 clang/test/OpenMP/fuse_codegen.cpp create mode 100644 clang/test/OpenMP/fuse_messages.cpp create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp create mode 100644 openmp/runtime/test/transform/fuse/intfor.c create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index e4cb4327fbaac..148b89ab9cfa4 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2162,6 +2162,10 @@ enum CXCursorKind { */ CXCursor_OMPStripeDirective = 310, + /** OpenMP fuse directive + */ + CXCursor_OMPFuseDirective = 318, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 5cb2f57edffe4..918216e8df4aa 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3090,6 +3090,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective, DEF_TRAVERSE_STMT(OMPReverseDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPFuseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPInterchangeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index e2fd2114026f7..cb8bb91f4768c 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Number of loops generated by this loop transformation. unsigned NumGeneratedLoops = 0; + /// Number of top level canonical loop nests generated by this loop + /// transformation + unsigned NumGeneratedLoopNests = 0; protected: explicit OMPLoopTransformationDirective(StmtClass SC, @@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop + /// transformation + void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } public: /// Return the number of associated (consumed) loops. @@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Return the number of loops generated by this loop transformation. unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; } + /// Return the number of top level canonical loop nests generated by this loop + /// transformation + unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; } + /// Get the de-sugared statements after the loop transformation. /// /// Might be nullptr if either the directive generates no loops and is handled @@ -995,7 +1005,8 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass; + C == OMPStripeDirectiveClass || + C == OMPFuseDirectiveClass; } }; @@ -5562,6 +5573,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5793,6 +5805,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_reverse, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5865,6 +5878,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5915,6 +5929,89 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp fuse' loop transformation directive +/// +/// \code{c} +/// #pragma omp fuse +/// { +/// for(int i = 0; i < m1; ++i) {...} +/// for(int j = 0; j < m2; ++j) {...} +/// ... +/// } +/// \endcode + +class OMPFuseDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + // Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPLoopTransformationDirective(OMPFuseDirectiveClass, + llvm::omp::OMPD_fuse, StartLoc, EndLoc, + NumLoops) { + // This default initialization assumes simple loop fusion. + // If a 'looprange' clause is specified, these values must be explicitly set + setNumGeneratedLoopNests(1); + setNumGeneratedLoops(NumLoops); + } + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for #pragma omp fuse' + /// + /// \param C Context of the AST + /// \param StartLoc Location of the introducer (e.g the 'omp' token) + /// \param EndLoc Location of the directive's end (e.g the tok::eod) + /// \param Clauses The directive's clauses + /// \param NumLoops Number of total affected loops + /// \param NumLoopNests Number of affected top level canonical loops + /// (number of items in the 'looprange' clause if present) + /// \param AssociatedStmt The outermost associated loop + /// \param TransformedStmt The loop nest after fusion, or nullptr in + /// dependent + /// \param PreInits Helper preinits statements for the loop nest + static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, + Stmt *PreInits); + + /// Build an empty '#pragma omp fuse' AST node for deserialization + /// + /// \param C Context of the AST + /// \param NumClauses Number of clauses to allocate + /// \param NumLoops Number of associated loops to allocate + static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops); + + /// Gets the associated loops after the transformation. This is the de-sugared + /// replacement or nulltpr in dependent contexts. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPFuseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 979ff60b73b75..fe9ca29038a1f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,6 +11612,14 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; +def warn_omp_different_loop_ind_var_types : Warning < + "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">; +def err_omp_not_canonical_loop : Error < + "loop after '#pragma omp %0' is not in canonical form">; +def err_omp_not_a_loop_sequence : Error < + "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; +def err_omp_empty_loop_sequence : Error < + "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index c9c173f5c7469..45d1a813e4b1f 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -233,6 +233,7 @@ def OMPStripeDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; def OMPInterchangeDirective : StmtNode; +def OMPFuseDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 7b169f56b6807..ea21377a8db9c 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase { Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + + /// Called on well-formed '#pragma omp fuse' after parsing of its + /// clauses and the associated statement. + StmtResult ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -1481,6 +1488,26 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); + /// Analyzes and checks a loop sequence for use by a loop transformation + /// + /// \param Kind The loop transformation directive kind. + /// \param NumLoops [out] Number of total canonical loops + /// \param LoopSeqSize [out] Number of top level canonical loops + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param LoopStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop. + /// \param Context + /// \return Whether there was an absence of errors or not + bool checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + ASTContext &Context); + /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. struct OMPDeclareVariantScope { diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 9d265f27b8e31..83b73554d693c 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1948,6 +1948,7 @@ enum StmtCode { STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, + STMT_OMP_FUSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 2eeb5e45ab511..276e43ec9f7d5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -456,6 +456,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, auto *Dir = createDirective( C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); + // The number of generated loops and loop nests during unroll matches + Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); return Dir; @@ -508,6 +510,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPFuseDirective *OMPFuseDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { + + OMPFuseDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + Dir->setNumGeneratedLoopNests(NumLoopNests); + Dir->setNumGeneratedLoops(NumLoops); + return Dir; +} + +OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 28317911d825b..4f57c63154da0 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -790,6 +790,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { + Indent() << "#pragma omp fuse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c61450e19f1b6..c5d1d5b48508e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1026,6 +1026,10 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index a451fc7c01841..d172450512f13 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -702,7 +702,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe; + DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 8742f8e0fc04a..aa12d62d1b865 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast(*S)); break; + case Stmt::OMPFuseDirectiveClass: + EmitOMPFuseDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d9195d749e056..cf03d5d3d88a3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -198,6 +198,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } else if (const auto *Interchange = dyn_cast(&S)) { PreInits = Interchange->getPreInits(); + } else if (const auto *Fuse = dyn_cast(&S)) { + PreInits = Fuse->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2922,6 +2924,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective( EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) { + // Emit the de-sugared statement + OMPTransformDirectiveScopeRAII FuseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a5ab9df01dba9..fe753e5b688b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3855,6 +3855,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); + void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPScopeDirective(const OMPScopeDirective &S); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 0a6cea8869c14..3eb59156c04af 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 00f4658180807..84ac9587bd54d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4404,6 +4404,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: + case OMPD_fuse: case OMPD_assume: break; default: @@ -6221,6 +6222,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_fuse: + Res = + ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -14230,6 +14235,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); @@ -14240,6 +14247,265 @@ bool SemaOpenMP::checkTransformableLoopNest( return Result; } +class NestedLoopCounterVisitor + : public clang::RecursiveASTVisitor { +public: + explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {} + + bool VisitForStmt(clang::ForStmt *FS) { + ++NestedLoopCount; + return true; + } + + bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) { + ++NestedLoopCount; + return true; + } + + unsigned getNestedLoopCount() const { return NestedLoopCount; } + +private: + unsigned NestedLoopCount; +}; + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + ASTContext &Context) { + + // Checks whether the given statement is a compound statement + VarsWithInheritedDSAType TmpDSA; + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + // Callback for updating pre-inits in case there are even more + // loop-sequence-generating-constructs inside of the main compound stmt + auto OnTransformationCallback = + [&OriginalInits](OMPLoopBasedDirective *Transform) { + Stmt *DependentPreInits; + if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else + llvm_unreachable("Unhandled loop transformation"); + + appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + }; + + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. handleLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure + + auto NLCV = NestedLoopCounterVisitor(); + // Helper functions to validate canonical loop sequence grammar is valid + auto isLoopSequenceDerivation = [](auto *Child) { + return isa(Child) || isa(Child) || + isa(Child); + }; + auto isLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + + // Helper Lambda to handle storing initialization and body statements for both + // ForStmt and CXXForRangeStmt and checks for any possible mismatch between + // induction variables types + QualType BaseInductionVarType; + auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, + this, &Context](Stmt *LoopStmt) { + if (auto *For = dyn_cast(LoopStmt)) { + OriginalInits.back().push_back(For->getInit()); + ForStmts.push_back(For); + // Extract induction variable + if (auto *InitStmt = dyn_cast_or_null(For->getInit())) { + if (auto *InitDecl = dyn_cast(InitStmt->getSingleDecl())) { + QualType InductionVarType = InitDecl->getType().getCanonicalType(); + + // Compare with first loop type + if (BaseInductionVarType.isNull()) { + BaseInductionVarType = InductionVarType; + } else if (!Context.hasSameType(BaseInductionVarType, + InductionVarType)) { + Diag(InitDecl->getBeginLoc(), + diag::warn_omp_different_loop_ind_var_types) + << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType + << InductionVarType; + } + } + } + + } else { + assert(isa(LoopStmt) && + "Expected canonical for or range-based for loops."); + auto *CXXFor = dyn_cast(LoopStmt); + OriginalInits.back().push_back(CXXFor->getBeginStmt()); + ForStmts.push_back(CXXFor); + } + }; + // Helper lambda functions to encapsulate the processing of different + // derivations of the canonical loop sequence grammar + // + // Modularized code for handling loop generation and transformations + auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers, + &OriginalInits, &LoopSeqSize, &NumLoops, Kind, + &TmpDSA, &OnTransformationCallback, + this](Stmt *Child) { + auto LoopTransform = dyn_cast(Child); + Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); + unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); + + // Handle the case where transformed statement is not available due to + // dependent contexts + if (!TransformedStmt) { + if (NumGeneratedLoopNests > 0) + return true; + // Unroll full + else { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + // Handle loop transformations with multiple loop nests + // Unroll full + if (NumGeneratedLoopNests <= 0) { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + // Future loop transformations that generate multiple canonical loops + } else if (NumGeneratedLoopNests > 1) { + llvm_unreachable("Multiple canonical loop generating transformations " + "like loop splitting are not yet supported"); + } + + // Process the transformed loop statement + Child = TransformedStmt; + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + OnTransformationCallback(LoopTransform); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(TransformedStmt); + NumLoops += LoopTransform->getNumGeneratedLoops(); + return true; + }; + + // Modularized code for handling regular canonical loops + auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV, + this](Stmt *Child) { + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(Child); + NumLoops += NLCV.TraverseStmt(Child); + return true; + }; + + // Helper function to process a Loop Sequence Recursively + auto handleLoopSequence = [&](Stmt *LoopSeqStmt, + auto &handleLoopSequenceCallback) -> bool { + for (auto *Child : LoopSeqStmt->children()) { + if (!Child) + continue; + + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { + if (isLoopGeneratingStmt(Child)) { + if (!handleLoopGeneration(Child)) { + return false; + } + } else { + if (!handleRegularLoop(Child)) { + return false; + } + } + ++LoopSeqSize; + } else { + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + return true; + }; + + // Recursive entry point to process the main loop sequence + if (!handleLoopSequence(AStmt, handleLoopSequence)) { + return false; + } + + if (LoopSeqSize <= 0) { + Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + return true; +} + /// Add preinit statements that need to be propageted from the selected loop. static void addLoopPreInits(ASTContext &Context, OMPLoopBasedDirective::HelperExprs &LoopHelper, @@ -15499,6 +15765,340 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + DeclContext *CurrContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + CaptureVars CopyTransformer(SemaRef); + + // Ensure the structured block is not empty + if (!AStmt) { + return StmtError(); + } + // Validate that the potential loop sequence is transformable for fusion + // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops + SmallVector LoopHelpers; + SmallVector LoopStmts; + SmallVector> OriginalInits; + + unsigned NumLoops; + // TODO: Support looprange clause using LoopSeqSize + unsigned LoopSeqSize; + if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, + LoopHelpers, LoopStmts, OriginalInits, + Context)) { + return StmtError(); + } + + // Defer transformation in dependent contexts + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, 1, AStmt, nullptr, nullptr); + } + assert(LoopHelpers.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + assert(OriginalInits.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. + SmallVector PreInits; + + // Select the type with the largest bit width among all induction variables + QualType IVType = LoopHelpers[0].IterationVarRef->getType(); + for (unsigned int I = 1; I < LoopSeqSize; ++I) { + QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); + if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { + IVType = CurrentIVType; + } + } + uint64_t IVBitWidth = Context.getIntWidth(IVType); + + // Create pre-init declarations for all loops lower bounds, upper bounds, + // strides and num-iterations + SmallVector LBVarDecls; + SmallVector STVarDecls; + SmallVector NIVarDecls; + SmallVector UBVarDecls; + SmallVector IVVarDecls; + + // Helper lambda to create variables for bounds, strides, and other + // expressions. Generates both the variable declaration and the corresponding + // initialization statement. + auto CreateHelperVarAndStmt = + [&SemaRef = this->SemaRef, &Context, &CopyTransformer, + &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I, + bool NeedsNewVD = false) { + Expr *TransformedExpr = + AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); + if (!TransformedExpr) + return std::pair(nullptr, StmtError()); + + auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str(); + + VarDecl *VD; + if (NeedsNewVD) { + VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name); + SemaRef.AddInitializerToDecl(VD, TransformedExpr, false); + + } else { + // Create a unique variable name + DeclRefExpr *DRE = cast(TransformedExpr); + VD = cast(DRE->getDecl()); + VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name)); + } + // Create the corresponding declaration statement + StmtResult DeclStmt = new (Context) class DeclStmt( + DeclGroupRef(VD), SourceLocation(), SourceLocation()); + return std::make_pair(VD, DeclStmt); + }; + + // Process each single loop to generate and collect declarations + // and statements for all helper expressions + for (unsigned int I = 0; I < LoopSeqSize; ++I) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I); + auto [NIVD, NIDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true); + auto [IVVD, IVDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I); + + if (!LBVD || !STVD || !NIVD || !IVVD) + return StmtError(); + + UBVarDecls.push_back(UBVD); + LBVarDecls.push_back(LBVD); + STVarDecls.push_back(STVD); + NIVarDecls.push_back(NIVD); + IVVarDecls.push_back(IVVD); + + PreInits.push_back(UBDStmt.get()); + PreInits.push_back(LBDStmt.get()); + PreInits.push_back(STDStmt.get()); + PreInits.push_back(NIDStmt.get()); + PreInits.push_back(IVDStmt.get()); + } + + auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) { + return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(), + false); + }; + + // Following up the creation of the final fused loop will be performed + // which has the following shape (considering the selected loops): + // + // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) { + // if (fuse.index < ni0){ + // iv0 = lb0 + st0 * fuse.index; + // original.index0 = iv0 + // body(0); + // } + // if (fuse.index < ni1){ + // iv1 = lb1 + st1 * fuse.index; + // original.index1 = iv1 + // body(1); + // } + // + // ... + // + // if (fuse.index < nik){ + // ivk = lbk + stk * fuse.index; + // original.indexk = ivk + // body(k); Expr *InitVal = IntegerLiteral::Create(Context, + // llvm::APInt(IVWidth, 0), + + // } + + // 1. Create the initialized fuse index + const std::string IndexName = Twine(".omp.fuse.index").str(); + Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), + IVType, SourceLocation()); + VarDecl *IndexDecl = + buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr); + SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation()); + + if (!InitStmt.isUsable()) + return StmtError(); + + auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType, + Loc = InitVal->getExprLoc()]() { + return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false); + }; + + // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2, + // ..., NI_k) + // + // This loop accumulates the maximum value across multiple expressions, + // ensuring each step constructs a unique AST node for correctness. By using + // intermediate temporary variables and conditional operators, we maintain + // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c): + // omp.temp0 = max(a, b) + // omp.temp1 = max(omp.temp0, c) + // omp.fuse.max = max(omp.temp1, omp.temp0) + + ExprResult MaxExpr; + for (unsigned I = 0; I < LoopSeqSize; ++I) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]); + QualType NITy = NIRef->getType(); + + if (MaxExpr.isUnset()) { + // Initialize MaxExpr with the first NI expression + MaxExpr = NIRef; + } else { + // Create a new acummulator variable t_i = MaxExpr + std::string TempName = (Twine(".omp.temp.") + Twine(I)).str(); + VarDecl *TempDecl = + buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); + TempDecl->setInit(MaxExpr.get()); + DeclRefExpr *TempRef = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + DeclRefExpr *TempRef2 = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + // Add a DeclStmt to PreInits to ensure the variable is declared. + StmtResult TempStmt = new (Context) + DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation()); + + if (!TempStmt.isUsable()) + return StmtError(); + PreInits.push_back(TempStmt.get()); + + // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef) + ExprResult Comparison = + SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef); + // Handle any errors in Comparison creation + if (!Comparison.isUsable()) + return StmtError(); + + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]); + // Update MaxExpr using a conditional expression to hold the max value + MaxExpr = new (Context) ConditionalOperator( + Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), + NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary); + + if (!MaxExpr.isUsable()) + return StmtError(); + } + } + if (!MaxExpr.isUsable()) + return StmtError(); + + // 3. Declare the max variable + const std::string MaxName = Twine(".omp.fuse.max").str(); + VarDecl *MaxDecl = + buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr); + MaxDecl->setInit(MaxExpr.get()); + DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false); + StmtResult MaxStmt = new (Context) + DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation()); + + if (MaxStmt.isInvalid()) + return StmtError(); + PreInits.push_back(MaxStmt.get()); + + // 4. Create condition Expr: index < n_max + ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, + MakeIVRef(), MaxRef); + if (!CondExpr.isUsable()) + return StmtError(); + // 5. Increment Expr: ++index + ExprResult IncrExpr = + SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + // 6. Build the Fused Loop Body + // The final fused loop iterates over the maximum logical range. Inside the + // loop, each original loop's index is calculated dynamically, and its body + // is executed conditionally. + // + // Each sub-loop's body is guarded by a conditional statement to ensure + // it executes only within its logical iteration range: + // + // if (fuse.index < ni_k){ + // iv_k = lb_k + st_k * fuse.index; + // original.index = iv_k + // body(k); + // } + + CompoundStmt *FusedBody = nullptr; + SmallVector FusedBodyStmts; + for (unsigned I = 0; I < LoopSeqSize; ++I) { + + // Assingment of the original sub-loop index to compute the logical index + // IV_k = LB_k + omp.fuse.index * ST_k + + ExprResult IdxExpr = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, + MakeVarDeclRef(STVarDecls[I]), MakeIVRef()); + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, + MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get()); + + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, + MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get()); + if (!IdxExpr.isUsable()) + return StmtError(); + + // Update the original i_k = IV_k + SmallVector BodyStmts; + BodyStmts.push_back(IdxExpr.get()); + llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + + if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) + BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); + + Stmt *Body = (isa(LoopStmts[I])) + ? cast(LoopStmts[I])->getBody() + : cast(LoopStmts[I])->getBody(); + + BodyStmts.push_back(Body); + + CompoundStmt *CombinedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + ExprResult Condition = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), + MakeVarDeclRef(NIVarDecls[I])); + + if (!Condition.isUsable()) + return StmtError(); + + IfStmt *IfStatement = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr, + Condition.get(), SourceLocation(), SourceLocation(), CombinedBody, + SourceLocation(), nullptr); + + FusedBodyStmts.push_back(IfStatement); + } + FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + + // 7. Construct the final fused loop + ForStmt *FusedForStmt = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(), + FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), + IncrExpr.get()->getEndLoc()); + + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + 1, AStmt, FusedForStmt, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3e33fb73e01b4..45f556f22c511 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9675,6 +9675,17 @@ StmtResult TreeTransform::TransformOMPInterchangeDirective( return Res; } +template +StmtResult +TreeTransform::TransformOMPFuseDirective(OMPFuseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 44cfb83ad2db4..291bd8ea4bf18 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2445,6 +2445,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); D->setNumGeneratedLoops(Record.readUInt32()); + D->setNumGeneratedLoopNests(Record.readUInt32()); } void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2467,6 +2468,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3608,6 +3613,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = OMPReverseDirective::CreateEmpty(Context, NumLoops); break; } + case STMT_OMP_FUSE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } case STMT_OMP_INTERCHANGE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a6e320c7f3eb0..5bf1ecfb968e8 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2449,6 +2449,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); Record.writeUInt32(D->getNumGeneratedLoops()); + Record.writeUInt32(D->getNumGeneratedLoopNests()); } void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2476,6 +2477,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_FUSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index c77ef26da568d..7218d7e62acdd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp new file mode 100644 index 0000000000000..43ce815dab024 --- /dev/null +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -0,0 +1,278 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2() { + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + // DUMP-NEXT: OMPPartialClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 4 + // DUMP-NEXT: IntegerLiteral {{.*}} 4 + #pragma omp unroll partial(4) + // PRINT: #pragma omp fuse + // DUMP-NEXT: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + +} + +//PRINT-LABEL: void foo3( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3 +template +void foo3() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp unroll partial(Factor1) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor1) + // PRINT: for (int i = 0; i < 12; i += 1) + // DUMP: ForStmt + for (int i = 0; i < 12; i += 1) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: #pragma omp unroll partial(Factor2) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor2) + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } +} + +// Also test instantiating the template. +void tfoo3() { + foo3<4,2>(); +} + +//PRINT-LABEL: void foo4( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4 +template +void foo4(int start, int end) { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (T i = start; i < end; i += Step) + // DUMP: ForStmt + for (T i = start; i < end; i += Step) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + + // PRINT: for (T j = end; j > start; j -= Step) + // DUMP: ForStmt + for (T j = end; j > start; j -= Step) { + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + + } +} + +// Also test instantiating the template. +void tfoo4() { + foo4(0, 64); +} + + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5() { + double arr[128], arr2[128]; + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT-NEXT: for (auto &&a : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&a: arr) + // PRINT: body(a) + // DUMP: CallExpr + body(a); + // PRINT: for (double v = 42; auto &&b : arr) + // DUMP: CXXForRangeStmt + for (double v = 42; auto &&b: arr) + // PRINT: body(b, v); + // DUMP: CallExpr + body(b, v); + // PRINT: for (auto &&c : arr2) + // DUMP: CXXForRangeStmt + for (auto &&c: arr2) + // PRINT: body(c) + // DUMP: CallExpr + body(c); + + } + +} + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionDecl {{.*}} foo6 +void foo6() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i <= 10; ++i) + // DUMP: ForStmt + for (int i = 0; i <= 10; ++i) + body(i); + // PRINT: for (int j = 0; j < 100; ++j) + // DUMP: ForStmt + for(int j = 0; j < 100; ++j) + body(j); + } + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(4) + // PRINT: for (int k = 0; k < 250; ++k) + // DUMP: ForStmt + for (int k = 0; k < 250; ++k) + body(k); + } +} + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + } + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + } + } + } + } + +} + + + + + +#endif \ No newline at end of file diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp new file mode 100644 index 0000000000000..6c1e21092da43 --- /dev/null +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -0,0 +1,1511 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +//placeholder for loop body code. +extern "C" void body(...) {} + +extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) { + int i,j; + #pragma omp fuse + { + for(i = start1; i < end1; i += step1) body(i); + for(j = start2; j < end2; j += step2) body(j); + } + +} + +template +void foo2(T start, T end, T step){ + T i,j,k; + #pragma omp fuse + { + for(i = start; i < end; i += step) body(i); + for(j = end; j > start; j -= step) body(j); + for(k = start+step; k < end+step; k += step) body(k); + } +} + +extern "C" void tfoo2() { + foo2(0, 64, 4); +} + +extern "C" void foo3() { + double arr[256]; + #pragma omp fuse + { + #pragma omp fuse + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK1: [[IF_THEN22]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END27]] +// CHECK1: [[IF_END27]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo2( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK1: [[COND_TRUE30]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32:.*]] +// CHECK1: [[COND_FALSE31]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32]] +// CHECK1: [[COND_END32]]: +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK1: [[IF_THEN40]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK1-NEXT: br label %[[IF_END45]] +// CHECK1: [[IF_END45]]: +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 +// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 +// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] +// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 +// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 +// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 +// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 +// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 +// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] +// CHECK1: [[COND_TRUE44]]: +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: br label %[[COND_END46:.*]] +// CHECK1: [[COND_FALSE45]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: br label %[[COND_END46]] +// CHECK1: [[COND_END46]]: +// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] +// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] +// CHECK1: [[COND_TRUE50]]: +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END52:.*]] +// CHECK1: [[COND_FALSE51]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END52]] +// CHECK1: [[COND_END52]]: +// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] +// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 +// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 +// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] +// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] +// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 +// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 +// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 +// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] +// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN64]]: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 +// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] +// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] +// CHECK1: [[IF_THEN70]]: +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 +// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] +// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END75]] +// CHECK1: [[IF_END75]]: +// CHECK1-NEXT: br label %[[IF_END76]] +// CHECK1: [[IF_END76]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] +// CHECK1: [[IF_THEN78]]: +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] +// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 +// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] +// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) +// CHECK1-NEXT: br label %[[IF_END83]] +// CHECK1: [[IF_END83]]: +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] +// CHECK1: [[IF_THEN85]]: +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] +// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] +// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 +// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] +// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) +// CHECK1-NEXT: br label %[[IF_END90]] +// CHECK1: [[IF_END90]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK2: [[IF_THEN22]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END27]] +// CHECK2: [[IF_END27]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 +// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 +// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 +// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] +// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 +// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 +// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 +// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 +// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 +// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 +// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] +// CHECK2: [[COND_TRUE44]]: +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: br label %[[COND_END46:.*]] +// CHECK2: [[COND_FALSE45]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: br label %[[COND_END46]] +// CHECK2: [[COND_END46]]: +// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] +// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] +// CHECK2: [[COND_TRUE50]]: +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END52:.*]] +// CHECK2: [[COND_FALSE51]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END52]] +// CHECK2: [[COND_END52]]: +// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] +// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 +// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 +// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] +// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] +// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 +// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 +// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 +// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] +// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN64]]: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 +// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] +// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] +// CHECK2: [[IF_THEN70]]: +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 +// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] +// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END75]] +// CHECK2: [[IF_END75]]: +// CHECK2-NEXT: br label %[[IF_END76]] +// CHECK2: [[IF_END76]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] +// CHECK2: [[IF_THEN78]]: +// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] +// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 +// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] +// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) +// CHECK2-NEXT: br label %[[IF_END83]] +// CHECK2: [[IF_END83]]: +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] +// CHECK2: [[IF_THEN85]]: +// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] +// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] +// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 +// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] +// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) +// CHECK2-NEXT: br label %[[IF_END90]] +// CHECK2: [[IF_END90]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo2( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK2: [[COND_TRUE30]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32:.*]] +// CHECK2: [[COND_FALSE31]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32]] +// CHECK2: [[COND_END32]]: +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK2: [[IF_THEN40]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK2-NEXT: br label %[[IF_END45]] +// CHECK2: [[IF_END45]]: +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +//. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp new file mode 100644 index 0000000000000..50dedfd2c0dc6 --- /dev/null +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + ; + + // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + {int bar = 0;} + + // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + int x = 2; + } + + // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error at +2 {{expected statement}} + #pragma omp fuse + } + + // expected-warning at +1 {{extra tokens at the end of '#pragma omp fuse' are ignored}} + #pragma omp fuse foo + { + for (int i = 0; i < 7; ++i) + ; + } + + + // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} + #pragma omp fuse final(0) + { + for (int i = 0; i < 7; ++i) + ; + } + + //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}} + //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} + #pragma omp fuse + { + for(int i = 0; i < 10; i*=2) { + ; + } + } + + //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} + #pragma omp fuse + {} + + //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + #pragma omp unroll full + for(int i = 0; i < 10; ++i); + + for(int j = 0; j < 10; ++j); + } + + //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}} + //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + for(unsigned int j = 0; j < 10; ++j); + for(long long k = 0; k < 100; ++k); + } +} \ No newline at end of file diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3068621d9c004..3afa59b2f2d6c 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2211,6 +2211,7 @@ class EnqueueVisitor : public ConstStmtVisitor, void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); + void VisitOMPFuseDirective(const OMPFuseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -3369,6 +3370,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6323,6 +6328,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPInterchangeDirective: return cxstring::createRef("OMPInterchangeDirective"); + case CXCursor_OMPFuseDirective: + return cxstring::createRef("OMPFuseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index a6301daa672c3..a6d032fa302b1 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -687,6 +687,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPInterchangeDirectiveClass: K = CXCursor_OMPInterchangeDirective; break; + case Stmt::OMPFuseDirectiveClass: + K = CXCursor_OMPFuseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index a87111cb5a11d..6352be8069e9e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -855,6 +855,10 @@ def OMP_For : Directive<[Spelling<"for">]> { let category = CA_Executable; let languages = [L_C]; } +def OMP_Fuse : Directive<[Spelling<"fuse">]> { + let association = AS_Loop; + let category = CA_Executable; +} def OMP_Interchange : Directive<[Spelling<"interchange">]> { let allowedOnceClauses = [ VersionedClause, diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp new file mode 100644 index 0000000000000..cabf4bf8a511d --- /dev/null +++ b/openmp/runtime/test/transform/fuse/foreach.cpp @@ -0,0 +1,192 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (Reporter a{"C"}; auto &&v : Reporter("A")) + printf("v=%d\n", v); + for (Reporter aa{"D"}; auto &&vv : Reporter("B")) + printf("vv=%d\n", vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done + + +#endif diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c new file mode 100644 index 0000000000000..b8171b4df7042 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/intfor.c @@ -0,0 +1,50 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (int i = 5; i <= 25; i += 5) + printf("i=%d\n", i); + for (int j = 10; j < 100; j += 10) + printf("j=%d\n", j); + for (int k = 10; k > 0; --k) + printf("k=%d\n", k); + } + printf("done\n"); + return EXIT_SUCCESS; +} +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=5 +// CHECK-NEXT: j=10 +// CHECK-NEXT: k=10 +// CHECK-NEXT: i=10 +// CHECK-NEXT: j=20 +// CHECK-NEXT: k=9 +// CHECK-NEXT: i=15 +// CHECK-NEXT: j=30 +// CHECK-NEXT: k=8 +// CHECK-NEXT: i=20 +// CHECK-NEXT: j=40 +// CHECK-NEXT: k=7 +// CHECK-NEXT: i=25 +// CHECK-NEXT: j=50 +// CHECK-NEXT: k=6 +// CHECK-NEXT: j=60 +// CHECK-NEXT: k=5 +// CHECK-NEXT: j=70 +// CHECK-NEXT: k=4 +// CHECK-NEXT: j=80 +// CHECK-NEXT: k=3 +// CHECK-NEXT: j=90 +// CHECK-NEXT: k=2 +// CHECK-NEXT: k=1 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp new file mode 100644 index 0000000000000..552484b2981c4 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/iterfor.cpp @@ -0,0 +1,194 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter C("C"); + Reporter D("D"); +#pragma omp fuse + { + for (auto it = C.begin(); it != C.end(); ++it) + printf("v=%d\n", *it); + + for (auto it = D.begin(); it != D.end(); ++it) + printf("vv=%d\n", *it); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: [C] ctor +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] end() +// CHECK-NEXT: [C] iterator distance: 3 +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] end() +// CHECK-NEXT: [D] iterator distance: 3 +// CHECK-NEXT: [C] iterator advance: 0 += 0 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 0 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 1 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 1 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 2 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 2 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [C] dtor diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..e9f76713fe3e0 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,208 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("i=%d v=%d\n", i, v); + for (int vv = 0; vv < 3; ++vv) + printf("i=%d vv=%d\n", i, vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done + diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c new file mode 100644 index 0000000000000..272908e72c429 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (int j = 0; j < 3; ++j) + printf("i=%d j=%d\n", i, j); + for (int k = 0; k < 3; ++k) + printf("i=%d k=%d\n", i, k); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: i=0 j=0 +// CHECK-NEXT: i=0 k=0 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: i=0 k=1 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: i=0 k=2 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: i=1 k=0 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: i=1 k=1 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: i=1 k=2 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: i=2 k=0 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: i=2 k=1 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: i=2 k=2 +// CHECK-NEXT: done >From 34ac92ada84eeca9573d0b005f24d73738f46626 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:28:04 +0000 Subject: [PATCH 02/12] Add looprange clause --- clang/include/clang/AST/OpenMPClause.h | 100 ++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 8 + clang/include/clang/AST/StmtOpenMP.h | 9 +- .../clang/Basic/DiagnosticSemaKinds.td | 5 + clang/include/clang/Parse/Parser.h | 3 + clang/include/clang/Sema/SemaOpenMP.h | 6 + clang/lib/AST/OpenMPClause.cpp | 35 ++ clang/lib/AST/StmtOpenMP.cpp | 7 +- clang/lib/AST/StmtProfile.cpp | 7 + clang/lib/Basic/OpenMPKinds.cpp | 2 + clang/lib/Parse/ParseOpenMP.cpp | 36 ++ clang/lib/Sema/SemaOpenMP.cpp | 155 +++++++-- clang/lib/Sema/TreeTransform.h | 33 ++ clang/lib/Serialization/ASTReader.cpp | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 4 +- clang/lib/Serialization/ASTWriter.cpp | 8 + clang/test/OpenMP/fuse_ast_print.cpp | 67 ++++ clang/test/OpenMP/fuse_codegen.cpp | 320 +++++++++++++++++- clang/test/OpenMP/fuse_messages.cpp | 112 +++++- clang/tools/libclang/CIndex.cpp | 5 + llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 16 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 6 + 22 files changed, 919 insertions(+), 36 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 6fd16bc0f03be..8f937cdef9cd0 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1143,6 +1143,106 @@ class OMPFullClause final : public OMPNoChildClause { static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final : public OMPClause { + friend class OMPClauseReader; + + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of 'first' + SourceLocation FirstLoc; + + /// Location of 'count' + SourceLocation CountLoc; + + /// Expr associated with 'first' argument + Expr *First = nullptr; + + /// Expr associated with 'count' argument + Expr *Count = nullptr; + + /// Set 'first' + void setFirst(Expr *First) { this->First = First; } + + /// Set 'count' + void setCount(Expr *Count) { this->Count = Count; } + + /// Set location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Set location of 'first' argument + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + + /// Set location of 'count' argument + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + +public: + /// Build an AST node for a 'looprange' clause + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param ModifierLoc Modifier location. + /// \param + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, Expr *First, Expr *Count); + + /// Build an empty 'looprange' node for deserialization + /// + /// \param C Context of the AST. + static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); + + /// Returns the location of '(' + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns the location of 'first' + SourceLocation getFirstLoc() const { return FirstLoc; } + + /// Returns the location of 'count' + SourceLocation getCountLoc() const { return CountLoc; } + + /// Returns the argument 'first' or nullptr if not set + Expr *getFirst() const { return cast_or_null(First); } + + /// Returns the argument 'count' or nullptr if not set + Expr *getCount() const { return cast_or_null(Count); } + + child_range children() { + return child_range(reinterpret_cast(&First), + reinterpret_cast(&Count) + 1); + } + + const_child_range children() const { + auto Children = const_cast(this)->children(); + return const_child_range(Children.begin(), Children.end()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_looprange; + } +}; + /// Representation of the 'partial' clause of the '#pragma omp unroll' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 918216e8df4aa..10e44e69dd5da 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3410,6 +3410,14 @@ bool RecursiveASTVisitor::VisitOMPFullClause(OMPFullClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPLoopRangeClause( + OMPLoopRangeClause *C) { + TRY_TO(TraverseStmt(C->getFirst())); + TRY_TO(TraverseStmt(C->getCount())); + return true; +} + template bool RecursiveASTVisitor::VisitOMPPartialClause(OMPPartialClause *C) { TRY_TO(TraverseStmt(C->getFactor())); diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index cb8bb91f4768c..f5115afd0753e 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5572,7 +5572,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { + // Tiling doubles the original number of loops setNumGeneratedLoops(2 * NumLoops); + // Produces a single top-level canonical loop nest setNumGeneratedLoopNests(1); } @@ -5804,6 +5806,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, EndLoc, NumLoops) { + // Reverse produces a single top-level canonical loop nest setNumGeneratedLoops(NumLoops); setNumGeneratedLoopNests(1); } @@ -5877,6 +5880,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { + // Interchange produces a single top-level canonical loop + // nest, with the exact same amount of total loops setNumGeneratedLoops(NumLoops); setNumGeneratedLoopNests(1); } @@ -5995,8 +6000,10 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective { /// \param C Context of the AST /// \param NumClauses Number of clauses to allocate /// \param NumLoops Number of associated loops to allocate + /// \param NumLoopNests Number of top level loops to allocate static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, - unsigned NumLoops); + unsigned NumLoops, + unsigned NumLoopNests); /// Gets the associated loops after the transformation. This is the de-sugared /// replacement or nulltpr in dependent contexts. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fe9ca29038a1f..002aa7a774fbe 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11620,6 +11620,11 @@ def err_omp_not_a_loop_sequence : Error < "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; def err_omp_empty_loop_sequence : Error < "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; +def err_omp_invalid_looprange : Error < + "loop range in '#pragma omp %0' exceeds the number of available loops: " + "range end '%1' is greater than the total number of loops '%2'">; +def warn_omp_redundant_fusion : Warning < + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a47e23ffbd357..08bee0078b5ff 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6735,6 +6735,9 @@ class Parser : public CodeCompletionHandler { OpenMPClauseKind Kind, bool ParseOnly); + /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. + OMPClause *ParseOpenMPLoopRangeClause(); + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index ea21377a8db9c..0c28aaf6ab21a 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -922,6 +922,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on well-form 'looprange' clause after parsing its arguments. + OMPClause * + ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc); /// Called on well-formed 'ordered' clause. OMPClause * ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0e5052b944162..0b5808eb100e4 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { return new (C) OMPPartialClause(); } +OMPLoopRangeClause * +OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + Expr *First, Expr *Count) { + OMPLoopRangeClause *Clause = CreateEmpty(C); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setLocEnd(EndLoc); + Clause->setFirstLoc(FirstLoc); + Clause->setCountLoc(CountLoc); + Clause->setFirst(First); + Clause->setCount(Count); + return Clause; +} + +OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPLoopRangeClause(); +} + OMPAllocateClause *OMPAllocateClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc, @@ -1888,6 +1908,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) { } } +void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) { + OS << "looprange"; + + Expr *First = Node->getFirst(); + Expr *Count = Node->getCount(); + + if (First && Count) { + OS << "("; + First->printPretty(OS, nullptr, Policy, 0); + OS << ","; + Count->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 276e43ec9f7d5..c5a6732cc2217 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -527,10 +527,13 @@ OMPFuseDirective *OMPFuseDirective::Create( OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, - unsigned NumLoops) { - return createEmptyDirective( + unsigned NumLoops, + unsigned NumLoopNests) { + OMPFuseDirective *Dir = createEmptyDirective( C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops); + Dir->setNumGeneratedLoopNests(NumLoopNests); + return Dir; } OMPForSimdDirective * diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c5d1d5b48508e..34ed3f22f6eb7 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) { Profiler->VisitExpr(Factor); } +void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + if (const Expr *First = C->getFirst()) + Profiler->VisitExpr(First); + if (const Expr *Count = C->getCount()) + Profiler->VisitExpr(Count); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index d172450512f13..18330181f1509 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 78d3503d8eb68..2d6d624c1ecc8 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3057,6 +3057,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPLoopRangeClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SourceLocation FirstLoc, CountLoc; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + FirstLoc = Tok.getLocation(); + ExprResult FirstVal = ParseConstantExpression(); + if (!FirstVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ExpectAndConsume(tok::comma); + + CountLoc = Tok.getLocation(); + ExprResult CountVal = ParseConstantExpression(); + if (!CountVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + T.consumeClose(); + + return Actions.OpenMP().ActOnOpenMPLoopRangeClause( + FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(), + FirstLoc, CountLoc, T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPPermutationClause() { SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; SmallVector ArgExprs; @@ -3485,6 +3518,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_looprange: + Clause = ParseOpenMPLoopRangeClause(); + break; default: break; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 84ac9587bd54d..3ec3f2ad31e78 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14326,7 +14326,6 @@ bool SemaOpenMP::checkTransformableLoopSequence( // and tries to match the input AST to the canonical loop sequence grammar // structure - auto NLCV = NestedLoopCounterVisitor(); // Helper functions to validate canonical loop sequence grammar is valid auto isLoopSequenceDerivation = [](auto *Child) { return isa(Child) || isa(Child) || @@ -14429,7 +14428,7 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Modularized code for handling regular canonical loops auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, this](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); @@ -14442,8 +14441,11 @@ bool SemaOpenMP::checkTransformableLoopSequence( << getOpenMPDirectiveName(Kind); return false; } + storeLoopStatements(Child); - NumLoops += NLCV.TraverseStmt(Child); + auto NLCV = NestedLoopCounterVisitor(); + NLCV.TraverseStmt(Child); + NumLoops += NLCV.getNestedLoopCount(); return true; }; @@ -15769,6 +15771,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); DeclContext *CurrContext = SemaRef.CurContext; Scope *CurScope = SemaRef.getCurScope(); @@ -15785,7 +15788,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SmallVector> OriginalInits; unsigned NumLoops; - // TODO: Support looprange clause using LoopSeqSize unsigned LoopSeqSize; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, @@ -15794,10 +15796,67 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, } // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder (0) + // because a dependent context could prevent determining its true value if (CurrContext->isDependentContext()) { return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, - NumLoops, 1, AStmt, nullptr, nullptr); + NumLoops, 0, AStmt, nullptr, nullptr); } + + // Handle clauses, which can be any of the following: [looprange, apply] + const OMPLoopRangeClause *LRC = + OMPExecutableDirective::getSingleClause(Clauses); + + // The clause arguments are invalidated if any error arises + // such as non-constant or non-positive arguments + if (LRC && (!LRC->getFirst() || !LRC->getCount())) + return StmtError(); + + // Delayed semantic check of LoopRange constraint + // Evaluates the loop range arguments and returns the first and count values + auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count, + uint64_t &FirstVal, + uint64_t &CountVal) { + llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context); + llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context); + FirstVal = FirstInt.getZExtValue(); + CountVal = CountInt.getZExtValue(); + }; + + // Checks if the loop range is valid + auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, + unsigned NumLoops) -> bool { + return FirstVal + CountVal - 1 <= NumLoops; + }; + uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + + if (LRC) { + EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, + CountVal); + if (CountVal == 1) + SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) { + SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) + << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1) + << LoopSeqSize; + return StmtError(); + } + + LastVal = FirstVal + CountVal - 1; + } + + // Complete fusion generates a single canonical loop nest + // However looprange clause generates several loop nests + unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1; + + // Emit a warning for redundant loop fusion when the sequence contains only + // one loop. + if (LoopSeqSize == 1) + SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + assert(LoopHelpers.size() == LoopSeqSize && "Expecting loop iteration space dimensionality to match number of " "affected loops"); @@ -15811,8 +15870,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SmallVector PreInits; // Select the type with the largest bit width among all induction variables - QualType IVType = LoopHelpers[0].IterationVarRef->getType(); - for (unsigned int I = 1; I < LoopSeqSize; ++I) { + QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); + for (unsigned int I = FirstVal; I < LastVal; ++I) { QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { IVType = CurrentIVType; @@ -15861,20 +15920,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Process each single loop to generate and collect declarations // and statements for all helper expressions - for (unsigned int I = 0; I < LoopSeqSize; ++I) { + for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], PreInits); - auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I); - auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I); - auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I); + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); auto [NIVD, NIDStmt] = - CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true); + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true); auto [IVVD, IVDStmt] = - CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I); + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); if (!LBVD || !STVD || !NIVD || !IVVD) - return StmtError(); + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); UBVarDecls.push_back(UBVD); LBVarDecls.push_back(LBVD); @@ -15949,8 +16009,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // omp.fuse.max = max(omp.temp1, omp.temp0) ExprResult MaxExpr; - for (unsigned I = 0; I < LoopSeqSize; ++I) { - DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]); + // I is the true + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]); QualType NITy = NIRef->getType(); if (MaxExpr.isUnset()) { @@ -15958,7 +16019,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, MaxExpr = NIRef; } else { // Create a new acummulator variable t_i = MaxExpr - std::string TempName = (Twine(".omp.temp.") + Twine(I)).str(); + std::string TempName = (Twine(".omp.temp.") + Twine(J)).str(); VarDecl *TempDecl = buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); TempDecl->setInit(MaxExpr.get()); @@ -15981,7 +16042,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!Comparison.isUsable()) return StmtError(); - DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]); + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]); // Update MaxExpr using a conditional expression to hold the max value MaxExpr = new (Context) ConditionalOperator( Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), @@ -16034,23 +16095,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CompoundStmt *FusedBody = nullptr; SmallVector FusedBodyStmts; - for (unsigned I = 0; I < LoopSeqSize; ++I) { - + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { // Assingment of the original sub-loop index to compute the logical index // IV_k = LB_k + omp.fuse.index * ST_k - ExprResult IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, - MakeVarDeclRef(STVarDecls[I]), MakeIVRef()); + MakeVarDeclRef(STVarDecls[J]), MakeIVRef()); if (!IdxExpr.isUsable()) return StmtError(); IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, - MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get()); + MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get()); if (!IdxExpr.isUsable()) return StmtError(); IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, - MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get()); + MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get()); if (!IdxExpr.isUsable()) return StmtError(); @@ -16065,7 +16124,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, Stmt *Body = (isa(LoopStmts[I])) ? cast(LoopStmts[I])->getBody() : cast(LoopStmts[I])->getBody(); - BodyStmts.push_back(Body); CompoundStmt *CombinedBody = @@ -16073,7 +16131,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SourceLocation(), SourceLocation()); ExprResult Condition = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), - MakeVarDeclRef(NIVarDecls[I])); + MakeVarDeclRef(NIVarDecls[J])); if (!Condition.isUsable()) return StmtError(); @@ -16094,8 +16152,26 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), IncrExpr.get()->getEndLoc()); + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + Stmt *FusionStmt = FusedForStmt; + if (LRC) { + SmallVector FinalLoops; + // Gather all the pre-fusion loops + for (unsigned I = 0; I < FirstVal - 1; ++I) + FinalLoops.push_back(LoopStmts[I]); + // Gather the fused loop + FinalLoops.push_back(FusedForStmt); + // Gather all the post-fusion loops + for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I) + FinalLoops.push_back(LoopStmts[I]); + FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + } return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, - 1, AStmt, FusedForStmt, + NumLoopNests, AStmt, FusionStmt, buildPreInits(Context, PreInits)); } @@ -17218,6 +17294,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr); } +OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( + Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) { + + // OpenMP [6.0, Restrictions] + // First and Count must be integer expressions with positive value + ExprResult FirstVal = + VerifyPositiveIntegerConstantInClause(First, OMPC_looprange); + if (FirstVal.isInvalid()) + First = nullptr; + + ExprResult CountVal = + VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange); + if (CountVal.isInvalid()) + Count = nullptr; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + // This check must be performed afterwards due to the delayed + // parsing and computation of the associated loop sequence + return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, + FirstLoc, CountLoc, EndLoc, First, Count); +} + OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 45f556f22c511..30204faf59b7b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1775,6 +1775,14 @@ class TreeTransform { LParenLoc, EndLoc); } + OMPClause * + RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPLoopRangeClause( + First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -10578,6 +10586,31 @@ TreeTransform::TransformOMPPartialClause(OMPPartialClause *C) { C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) { + ExprResult F = getDerived().TransformExpr(C->getFirst()); + if (F.isInvalid()) + return nullptr; + + ExprResult Cn = getDerived().TransformExpr(C->getCount()); + if (Cn.isInvalid()) + return nullptr; + + Expr *First = F.get(); + Expr *Count = Cn.get(); + + bool Changed = (First != C->getFirst()) || (Count != C->getCount()); + + // If no changes and AlwaysRebuild() is false, return the original clause + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + + return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(), + C->getLParenLoc(), C->getFirstLoc(), + C->getCountLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a3fbc3d25acab..d5e7c287c23a4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11089,6 +11089,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_partial: C = OMPPartialClause::CreateEmpty(Context); break; + case llvm::omp::OMPC_looprange: + C = OMPLoopRangeClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -11490,6 +11493,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + C->setFirst(Record.readSubExpr()); + C->setCount(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); + C->setFirstLoc(Record.readSourceLocation()); + C->setCountLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 291bd8ea4bf18..b424b5aa7b0c6 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -3616,7 +3616,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case STMT_OMP_FUSE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; - S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops); + unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops, + NumLoopNests); break; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index af7229d748872..c99ffab64c6e6 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7807,6 +7807,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + Record.AddStmt(C->getFirst()); + Record.AddStmt(C->getCount()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getFirstLoc()); + Record.AddSourceLocation(C->getCountLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp index 43ce815dab024..ac4f0d38a9c68 100644 --- a/clang/test/OpenMP/fuse_ast_print.cpp +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -271,6 +271,73 @@ void foo7() { } +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +//PRINT-LABEL: void foo9( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9 +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C +template +void foo9() { + // PRINT: #pragma omp fuse looprange(F,C) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(F,C) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + + } +} + +// Also test instantiating the template. +void tfoo9() { + foo9<1, 2>(); +} + diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp index 6c1e21092da43..d9500bed3ce31 100644 --- a/clang/test/OpenMP/fuse_codegen.cpp +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -53,6 +53,18 @@ extern "C" void foo3() { } } +extern "C" void foo4() { + double arr[256]; + + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 64; ++k) body(k); + for(int c = 42; auto &&v: arr) body(c,v); + } +} + #endif // CHECK1-LABEL: define dso_local void @body( @@ -777,6 +789,157 @@ extern "C" void foo3() { // CHECK1-NEXT: ret void // // +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2:.*]] +// CHECK1: [[FOR_COND2]]: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK1: [[FOR_BODY4]]: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK1: [[IF_THEN9]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK1-NEXT: br label %[[IF_END14]] +// CHECK1: [[IF_END14]]: +// CHECK1-NEXT: br label %[[FOR_INC15:.*]] +// CHECK1: [[FOR_INC15]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: [[FOR_END17]]: +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19:.*]] +// CHECK1: [[FOR_COND19]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK1: [[FOR_BODY21]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK1-NEXT: br label %[[FOR_INC22:.*]] +// CHECK1: [[FOR_INC22]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19]] +// CHECK1: [[FOR_END23]]: +// CHECK1-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @body( // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1259,6 +1422,157 @@ extern "C" void foo3() { // CHECK2-NEXT: ret void // // +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2:.*]] +// CHECK2: [[FOR_COND2]]: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK2: [[FOR_BODY4]]: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK2: [[IF_THEN9]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK2-NEXT: br label %[[IF_END14]] +// CHECK2: [[IF_END14]]: +// CHECK2-NEXT: br label %[[FOR_INC15:.*]] +// CHECK2: [[FOR_INC15]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: [[FOR_END17]]: +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19:.*]] +// CHECK2: [[FOR_COND19]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK2: [[FOR_BODY21]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK2-NEXT: br label %[[FOR_INC22:.*]] +// CHECK2: [[FOR_INC22]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19]] +// CHECK2: [[FOR_END23]]: +// CHECK2-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @tfoo2( // CHECK2-SAME: ) #[[ATTR0]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1494,7 +1808,7 @@ extern "C" void foo3() { // CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void // @@ -1503,9 +1817,13 @@ extern "C" void foo3() { // CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} // CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} //. // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} // CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} //. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 50dedfd2c0dc6..2a2491d008a0b 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -33,6 +33,8 @@ void func() { { for (int i = 0; i < 7; ++i) ; + for(int j = 0; j < 100; ++j); + } @@ -41,6 +43,8 @@ void func() { { for (int i = 0; i < 7; ++i) ; + for(int j = 0; j < 100; ++j); + } //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}} @@ -50,6 +54,7 @@ void func() { for(int i = 0; i < 10; i*=2) { ; } + for(int j = 0; j < 100; ++j); } //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} @@ -73,4 +78,109 @@ void func() { for(unsigned int j = 0; j < 10; ++j); for(long long k = 0; k < 100; ++k); } -} \ No newline at end of file + + //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + } + + //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(1, 1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, -1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, 0) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + const int x = 1; + constexpr int y = 4; + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(x,y) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(1,420) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +// In a template context, but expression itself not instantiation-dependent +template +static void templated_func() { + + //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(2,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(3,3) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + +} + +template +static void templated_func_value_dependent() { + + //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(V,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +template +static void templated_func_type_dependent() { + constexpr T s = 1; + + //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(s,s-1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + + +void template_inst() { + // expected-note at +1 {{in instantiation of function template specialization 'templated_func' requested here}} + templated_func(); + // expected-note at +1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}} + templated_func_value_dependent<1>(); + // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent' requested here}} + templated_func_type_dependent(); + +} + + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3afa59b2f2d6c..deb6fdd68e6d3 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2417,6 +2417,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) { Visitor->AddStmt(C->getFactor()); } +void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + Visitor->AddStmt(C->getFirst()); + Visitor->AddStmt(C->getCount()); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index de888ff86fe91..a2f258bbf745b 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1233,6 +1233,15 @@ struct WriteT { using EmptyTrait = std::true_type; }; +// V6: [6.4.7] Looprange clause +template struct LoopRangeT { + using Begin = E; + using End = E; + + using TupleTrait = std::true_type; + std::tuple t; +}; + // --- template @@ -1263,9 +1272,10 @@ using TupleClausesT = DefaultmapT, DeviceT, DistScheduleT, DoacrossT, FromT, GrainsizeT, IfT, InitT, InReductionT, - LastprivateT, LinearT, MapT, - NumTasksT, OrderT, ReductionT, - ScheduleT, TaskReductionT, ToT>; + LastprivateT, LinearT, LoopRangeT, + MapT, NumTasksT, OrderT, + ReductionT, ScheduleT, + TaskReductionT, ToT>; template using UnionClausesT = std::variant>; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 6352be8069e9e..989b35a7caa2a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -274,6 +274,9 @@ def OMPC_Linear : Clause<[Spelling<"linear">]> { def OMPC_Link : Clause<[Spelling<"link">]> { let flangClass = "OmpObjectList"; } +def OMPC_LoopRange : Clause<[Spelling<"looprange">]> { + let clangClass = "OMPLoopRangeClause"; +} def OMPC_Map : Clause<[Spelling<"map">]> { let clangClass = "OMPMapClause"; let flangClass = "OmpMapClause"; @@ -856,6 +859,9 @@ def OMP_For : Directive<[Spelling<"for">]> { let languages = [L_C]; } def OMP_Fuse : Directive<[Spelling<"fuse">]> { + let allowedOnceClauses = [ + VersionedClause + ]; let association = AS_Loop; let category = CA_Executable; } >From c76888b9dd1f516512d2d64afa4766adaad4de1e Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:30:39 +0000 Subject: [PATCH 03/12] Added fuse to documentation --- clang/docs/OpenMPSupport.rst | 2 ++ clang/docs/ReleaseNotes.rst | 1 + 2 files changed, 3 insertions(+) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 986aaabe1eed4..12325e3294a64 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,6 +376,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop fuse transformation | :good:`done` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | task_iteration | :none:`unclaimed` | :none:`unclaimed` | | diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dd748ab06873d..c75cb25a4db73 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1128,6 +1128,7 @@ OpenMP Support - Fixed mapping of arrays of structs containing nested structs with user defined mappers, by using compiler-generated default mappers for the outer structs for such maps. +- Added support for 'omp fuse' directive. Improvements ^^^^^^^^^^^^ >From 1c054673983282e7e6afef0f11c2a7fbe39181d7 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:43:41 +0000 Subject: [PATCH 04/12] Refactored preinits handling and improved coverage --- clang/docs/OpenMPSupport.rst | 2 +- clang/include/clang/AST/StmtOpenMP.h | 5 +- clang/include/clang/Sema/SemaOpenMP.h | 96 +- clang/lib/AST/StmtOpenMP.cpp | 13 + clang/lib/Basic/OpenMPKinds.cpp | 3 +- clang/lib/CodeGen/CGExpr.cpp | 2 + clang/lib/CodeGen/CodeGenFunction.h | 4 + clang/lib/Sema/SemaOpenMP.cpp | 588 ++++--- clang/test/OpenMP/fuse_ast_print.cpp | 55 + clang/test/OpenMP/fuse_codegen.cpp | 2117 +++++++++++++++---------- 10 files changed, 1862 insertions(+), 1023 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 12325e3294a64..8d62208e55f75 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,7 +376,7 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop fuse transformation | :good:`done` | :none:`unclaimed` | | +| loop fuse transformation | :good:`prototyped` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index f5115afd0753e..6425f6616a558 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1005,8 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass || - C == OMPFuseDirectiveClass; + C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass; } }; @@ -5653,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_stripe, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + // Similar to Tile, it only generates a single top level loop nest + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 0c28aaf6ab21a..547ea95c6cd5d 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1494,16 +1494,96 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); - /// Analyzes and checks a loop sequence for use by a loop transformation + /// @brief Categories of loops encountered during semantic OpenMP loop + /// analysis + /// + /// This enumeration identifies the structural category of a loop or sequence + /// of loops analyzed in the context of OpenMP transformations and directives. + /// This categorization helps differentiate between original source loops + /// and the structures resulting from applying OpenMP loop transformations. + enum class OMPLoopCategory { + + /// @var OMPLoopCategory::RegularLoop + /// Represents a standard canonical loop nest found in the + /// original source code or an intact loop after transformations + /// (i.e Post/Pre loops of a loopranged fusion) + RegularLoop, + + /// @var OMPLoopCategory::TransformSingleLoop + /// Represents the resulting loop structure when an OpenMP loop + // transformation, generates a single, top-level loop + TransformSingleLoop, + + /// @var OMPLoopCategory::TransformLoopSequence + /// Represents the resulting loop structure when an OpenMP loop + /// transformation + /// generates a sequence of two or more canonical loop nests + TransformLoopSequence + }; + + /// The main recursive process of `checkTransformableLoopSequence` that + /// performs grammatical parsing of a canonical loop sequence. It extracts + /// key information, such as the number of top-level loops, loop statements, + /// helper expressions, and other relevant loop-related data, all in a single + /// execution to avoid redundant traversals. This analysis flattens inner + /// Loop Sequences + /// + /// \param LoopSeqStmt The AST of the original statement. + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too). + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformPreInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. + /// \param Context + /// \param Kind The loop transformation directive kind. + /// \return Whether the original statement is both syntactically and + /// semantically correct according to OpenMP 6.0 canonical loop + /// sequence definition. + bool analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind); + + /// Validates and checks whether a loop sequence can be transformed according + /// to the given directive, providing necessary setup and initialization + /// (Driver function) before recursion using `analyzeLoopSequence`. /// /// \param Kind The loop transformation directive kind. - /// \param NumLoops [out] Number of total canonical loops - /// \param LoopSeqSize [out] Number of top level canonical loops + /// \param AStmt The AST of the original statement + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too) /// \param LoopHelpers [out] The multiple loop analyses results. - /// \param LoopStmts [out] The multiple Stmt of each For loop. - /// \param OriginalInits [out] The multiple collection of statements and + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformsPreInits [out] The multiple collection of statements and /// declarations that must have been executed/declared - /// before entering the loop. + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. /// \param Context /// \return Whether there was an absence of errors or not bool checkTransformableLoopSequence( @@ -1512,7 +1592,9 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, SmallVectorImpl> &OriginalInits, - ASTContext &Context); + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index c5a6732cc2217..f527e6361b5e5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -457,6 +457,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); // The number of generated loops and loop nests during unroll matches + // given that unroll only generates top level canonical loop nests + // so each generated loop is a top level canonical loop nest Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); @@ -520,6 +522,17 @@ OMPFuseDirective *OMPFuseDirective::Create( NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); + // The number of top level canonical nests could + // not match the total number of generated loops + // Example: + // Before fusion: + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) + // A[i][j] = i + j; + // + // for (int k = 0; k < P; ++k) + // B[k] = k * 2; + // Here, NumLoopNests = 2, but NumLoops = 3. Dir->setNumGeneratedLoopNests(NumLoopNests); Dir->setNumGeneratedLoops(NumLoops); return Dir; diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 18330181f1509..53a9f80e6d3b7 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -704,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse; + DKind == OMPD_interchange || DKind == OMPD_stripe || + DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6cb348ffdf55f..08049d4d4e37d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3257,6 +3257,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // No other cases for now. } else { + llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n"; + VD->dumpColor(); llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index fe753e5b688b1..bfe24213ed377 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5363,6 +5363,10 @@ class CodeGenFunction : public CodeGenTypeCache { /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { + if (LocalDeclMap.count(VD)) { + llvm::errs() << "Warning: VarDecl already exists in map: "; + VD->dumpColor(); + } assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3ec3f2ad31e78..3ce256f3ec23b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,6 +22,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -47,6 +48,7 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include +#include using namespace clang; using namespace llvm::omp; @@ -14194,6 +14196,45 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +// Overloaded base case function +template +static bool tryHandleAs(T *t, F &&) { + return false; +} + +/** + * Tries to recursively cast `t` to one of the given types and invokes `f` if successful. + * + * @tparam Class The first type to check. + * @tparam Rest The remaining types to check. + * @tparam T The base type of `t`. + * @tparam F The callable type for the function to invoke upon a successful cast. + * @param t The object to be checked. + * @param f The function to invoke if `t` matches `Class`. + * @return `true` if `t` matched any type and `f` was called, otherwise `false`. + */ +template +static bool tryHandleAs(T *t, F &&f) { + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } else { + return tryHandleAs(t, std::forward(f)); + } +} + +// Updates OriginalInits by checking Transform against loop transformation +// directives and appending their pre-inits if a match is found. +static void updatePreInits(OMPLoopBasedDirective *Transform, + SmallVectorImpl> &PreInits) { + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); +} + bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, @@ -14224,121 +14265,106 @@ bool SemaOpenMP::checkTransformableLoopNest( return false; }, [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + updatePreInits(Transform, OriginalInits); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } -class NestedLoopCounterVisitor - : public clang::RecursiveASTVisitor { +// Counts the total number of nested loops, including the outermost loop (the +// original loop). PRECONDITION of this visitor is that it must be invoked from +// the original loop to be analyzed. The traversal is stop for Decl's and +// Expr's given that they may contain inner loops that must not be counted. +// +// Example AST structure for the code: +// +// int main() { +// #pragma omp fuse +// { +// for (int i = 0; i < 100; i++) { <-- Outer loop +// []() { +// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +// }; +// for(int j = 0; j < 5; ++j) {} <-- Inner loop +// } +// for (int r = 0; i < 100; i++) { <-- Outer loop +// struct LocalClass { +// void bar() { +// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +// } +// }; +// for(int k = 0; k < 10; ++k) {} <-- Inner loop +// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +// } +// } +// } +// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { +private: + unsigned NestedLoopCount = 0; + public: - explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {} + explicit NestedLoopCounterVisitor() {} - bool VisitForStmt(clang::ForStmt *FS) { - ++NestedLoopCount; - return true; + unsigned getNestedLoopCount() const { return NestedLoopCount; } + + bool VisitForStmt(ForStmt *FS) override { + ++NestedLoopCount; + return true; } - bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) { - ++NestedLoopCount; - return true; + bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { + ++NestedLoopCount; + return true; } - unsigned getNestedLoopCount() const { return NestedLoopCount; } + bool TraverseStmt(Stmt *S) override { + if (!S) + return true; -private: - unsigned NestedLoopCount; + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) + return true; + + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S) || isa(S) || + isa(S)) { + return DynamicRecursiveASTVisitor::TraverseStmt(S); + } + + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; + } + + bool TraverseDecl(Decl *D) override { + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; + } }; -bool SemaOpenMP::checkTransformableLoopSequence( - OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, - unsigned &NumLoops, +bool SemaOpenMP::analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, SmallVectorImpl> &OriginalInits, - ASTContext &Context) { + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind) { - // Checks whether the given statement is a compound statement VarsWithInheritedDSAType TmpDSA; - if (!isa(AStmt)) { - Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) - << getOpenMPDirectiveName(Kind); - return false; - } - // Callback for updating pre-inits in case there are even more - // loop-sequence-generating-constructs inside of the main compound stmt - auto OnTransformationCallback = - [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); - }; - - // Number of top level canonical loop nests observed (And acts as index) - LoopSeqSize = 0; - // Number of total observed loops - NumLoops = 0; - - // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows - // the grammar: - // - // canonical-loop-sequence: - // { - // loop-sequence+ - // } - // where loop-sequence can be any of the following: - // 1. canonical-loop-sequence - // 2. loop-nest - // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) - // - // To recognise and traverse this structure the following helper functions - // have been defined. handleLoopSequence serves as the recurisve entry point - // and tries to match the input AST to the canonical loop sequence grammar - // structure - - // Helper functions to validate canonical loop sequence grammar is valid - auto isLoopSequenceDerivation = [](auto *Child) { - return isa(Child) || isa(Child) || - isa(Child); - }; - auto isLoopGeneratingStmt = [](auto *Child) { - return isa(Child); - }; - + QualType BaseInductionVarType; // Helper Lambda to handle storing initialization and body statements for both // ForStmt and CXXForRangeStmt and checks for any possible mismatch between // induction variables types - QualType BaseInductionVarType; auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, this, &Context](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { @@ -14361,33 +14387,35 @@ bool SemaOpenMP::checkTransformableLoopSequence( } } } - } else { - assert(isa(LoopStmt) && - "Expected canonical for or range-based for loops."); - auto *CXXFor = dyn_cast(LoopStmt); + auto *CXXFor = cast(LoopStmt); OriginalInits.back().push_back(CXXFor->getBeginStmt()); ForStmts.push_back(CXXFor); } }; + // Helper lambda functions to encapsulate the processing of different // derivations of the canonical loop sequence grammar // // Modularized code for handling loop generation and transformations - auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers, - &OriginalInits, &LoopSeqSize, &NumLoops, Kind, - &TmpDSA, &OnTransformationCallback, - this](Stmt *Child) { + auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers, + &OriginalInits, &TransformsPreInits, + &LoopCategories, &LoopSeqSize, &NumLoops, Kind, + &TmpDSA, &ForStmts, &Context, + &LoopSequencePreInits, this](Stmt *Child) { auto LoopTransform = dyn_cast(Child); Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); - + unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); // Handle the case where transformed statement is not available due to // dependent contexts if (!TransformedStmt) { - if (NumGeneratedLoopNests > 0) + if (NumGeneratedLoopNests > 0) { + LoopSeqSize += NumGeneratedLoopNests; + NumLoops += NumGeneratedLoops; return true; - // Unroll full + } + // Unroll full (0 loops produced) else { Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); @@ -14400,38 +14428,56 @@ bool SemaOpenMP::checkTransformableLoopSequence( Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; - // Future loop transformations that generate multiple canonical loops - } else if (NumGeneratedLoopNests > 1) { - llvm_unreachable("Multiple canonical loop generating transformations " - "like loop splitting are not yet supported"); } + // Loop transformatons such as split or loopranged fuse + else if (NumGeneratedLoopNests > 1) { + // Get the preinits related to this loop sequence generating + // loop transformation (i.e loopranged fuse, split...) + LoopSequencePreInits.emplace_back(); + // These preinits differ slightly from regular inits/pre-inits related + // to single loop generating loop transformations (interchange, unroll) + // given that they are not bounded to a particular loop nest + // so they need to be treated independently + updatePreInits(LoopTransform, LoopSequencePreInits); + return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops, + LoopHelpers, ForStmts, OriginalInits, + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context, Kind); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + else { + // Process the transformed loop statement + OriginalInits.emplace_back(); + TransformsPreInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, + *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(TransformedStmt); + updatePreInits(LoopTransform, TransformsPreInits); - // Process the transformed loop statement - Child = TransformedStmt; - OriginalInits.emplace_back(); - LoopHelpers.emplace_back(); - OnTransformationCallback(LoopTransform); - - unsigned IsCanonical = - checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, - TmpDSA, LoopHelpers[LoopSeqSize]); - - if (!IsCanonical) { - Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) - << getOpenMPDirectiveName(Kind); - return false; + NumLoops += NumGeneratedLoops; + ++LoopSeqSize; + return true; } - storeLoopStatements(TransformedStmt); - NumLoops += LoopTransform->getNumGeneratedLoops(); - return true; }; // Modularized code for handling regular canonical loops - auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, - this](Stmt *Child) { + auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, + &LoopCategories, this](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::RegularLoop); + unsigned IsCanonical = checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); @@ -14449,57 +14495,114 @@ bool SemaOpenMP::checkTransformableLoopSequence( return true; }; - // Helper function to process a Loop Sequence Recursively - auto handleLoopSequence = [&](Stmt *LoopSeqStmt, - auto &handleLoopSequenceCallback) -> bool { - for (auto *Child : LoopSeqStmt->children()) { - if (!Child) - continue; + // Helper functions to validate canonical loop sequence grammar is valid + auto isLoopSequenceDerivation = [](auto *Child) { + return isa(Child) || isa(Child) || + isa(Child); + }; + auto isLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + - // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { - Child = Child->IgnoreContainers(); + // High level grammar validation + for (auto *Child : LoopSeqStmt->children()) { - // Ignore empty compound statement if (!Child) - continue; + continue; - // In the case of a nested loop sequence ignoring containers would not - // be enough, a recurisve transversal of the loop sequence is required - if (isa(Child)) { - if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback)) - return false; - // Already been treated, skip this children - continue; + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers, + ForStmts, OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { + if (isLoopGeneratingStmt(Child)) { + if (!analyzeLoopGeneration(Child)) { + return false; } + // analyzeLoopGeneration updates Loop Sequence size accordingly + + } else { + if (!analyzeRegularLoop(Child)) { + return false; + } + // Update the Loop Sequence size by one + ++LoopSeqSize; } - // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { - if (isLoopGeneratingStmt(Child)) { - if (!handleLoopGeneration(Child)) { - return false; - } } else { - if (!handleRegularLoop(Child)) { - return false; - } + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; } - ++LoopSeqSize; - } else { - // Report error for invalid statement inside canonical loop sequence - Diag(Child->getBeginLoc(), diag::err_omp_not_for) - << 0 << getOpenMPDirectiveName(Kind); + } + return true; +} + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context) { + + // Checks whether the given statement is a compound statement + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); return false; - } - } - return true; - }; + } + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. analyzeLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure. This function will perform both a semantic and syntactical + // analysis of the given statement according to OpenMP 6.0 definition of + // the aforementioned canonical loop sequence // Recursive entry point to process the main loop sequence - if (!handleLoopSequence(AStmt, handleLoopSequence)) { - return false; + if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, + OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) { + return false; } - if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) << getOpenMPDirectiveName(Kind); @@ -14531,9 +14634,7 @@ static void addLoopPreInits(ASTContext &Context, RangeEnd->getBeginLoc(), RangeEnd->getEndLoc())); } - llvm::append_range(PreInits, OriginalInit); - // List of OMPCapturedExprDecl, for __begin, __end, and NumIterations if (auto *PI = cast_or_null(LoopHelper.PreInits)) { PreInits.push_back(new (Context) DeclStmt( @@ -15214,7 +15315,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *LoopStmt = nullptr; collectLoopStmts(AStmt, {LoopStmt}); - // Determine the PreInit declarations. + // Determine the PreInit declarations.e SmallVector PreInits; addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); @@ -15781,28 +15882,35 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!AStmt) { return StmtError(); } + + unsigned NumLoops = 1; + unsigned LoopSeqSize = 1; + + // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder 1 (even though + // using looprange fuse could yield up to 3 top level loop nests) + // because a dependent context could prevent determining its true value + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, LoopSeqSize, AStmt, nullptr, + nullptr); + } + // Validate that the potential loop sequence is transformable for fusion // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops SmallVector LoopHelpers; SmallVector LoopStmts; SmallVector> OriginalInits; - - unsigned NumLoops; - unsigned LoopSeqSize; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; + SmallVector LoopCategories; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, - Context)) { + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context)) { return StmtError(); } - // Defer transformation in dependent contexts - // The NumLoopNests argument is set to a placeholder (0) - // because a dependent context could prevent determining its true value - if (CurrContext->isDependentContext()) { - return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, - NumLoops, 0, AStmt, nullptr, nullptr); - } - // Handle clauses, which can be any of the following: [looprange, apply] const OMPLoopRangeClause *LRC = OMPExecutableDirective::getSingleClause(Clauses); @@ -15864,11 +15972,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, "Expecting loop iteration space dimensionality to match number of " "affected loops"); - // PreInits hold a sequence of variable declarations that must be executed - // before the fused loop begins. These include bounds, strides, and other - // helper variables required for the transformation. - SmallVector PreInits; - // Select the type with the largest bit width among all induction variables QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); for (unsigned int I = FirstVal; I < LastVal; ++I) { @@ -15880,7 +15983,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, uint64_t IVBitWidth = Context.getIntWidth(IVType); // Create pre-init declarations for all loops lower bounds, upper bounds, - // strides and num-iterations + // strides and num-iterations for every top level loop in the fusion SmallVector LBVarDecls; SmallVector STVarDecls; SmallVector NIVarDecls; @@ -15918,12 +16021,62 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, return std::make_pair(VD, DeclStmt); }; + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. Other loop transforms + // also contain their own preinits + SmallVector PreInits; + // Iterator to keep track of loop transformations + unsigned int TransformIndex = 0; + + // Update the general preinits using the preinits generated by loop sequence + // generating loop transformations. These preinits differ slightly from + // single-loop transformation preinits, as they can be detached from a + // specific loop inside the multiple generated loop nests. This happens + // because certain helper variables, like '.omp.fuse.max', are introduced to + // handle fused iteration spaces and may not be directly tied to a single + // original loop. the preinit structure must ensure that hidden variables + // like '.omp.fuse.max' are still properly handled. + // Transformations that apply this concept: Loopranged Fuse, Split + if (!LoopSequencePreInits.empty()) { + for (const auto <PreInits : LoopSequencePreInits) { + if (!LTPreInits.empty()) { + llvm::append_range(PreInits, LTPreInits); + } + } + } + // Process each single loop to generate and collect declarations - // and statements for all helper expressions + // and statements for all helper expressions related to + // particular single loop nests + + // Also In the case of the fused loops, we keep track of their original + // inits by appending them to their preinits statement, and in the case of + // transformations, also append their preinits (which contain the original + // loop initialization statement or other statements) + + // Firstly we need to update TransformIndex to match the begining of the + // looprange section + for (unsigned int I = 0; I < FirstVal - 1; ++I) { + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + } for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { - addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], - PreInits); + if (LoopCategories[I] == OMPLoopCategory::RegularLoop) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + // For transformed loops, insert both pre-inits and original inits. + // Order matters: pre-inits may define variables used in the original + // inits such as upper bounds... + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); @@ -15942,7 +16095,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, NIVarDecls.push_back(NIVD); IVVarDecls.push_back(IVVD); - PreInits.push_back(UBDStmt.get()); PreInits.push_back(LBDStmt.get()); PreInits.push_back(STDStmt.get()); PreInits.push_back(NIDStmt.get()); @@ -16118,6 +16270,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, BodyStmts.push_back(IdxExpr.get()); llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + // If the loop is a CXXForRangeStmt then the iterator variable is needed if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); @@ -16152,21 +16305,50 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), IncrExpr.get()->getEndLoc()); - // In the case of looprange, the result of fuse won't simply - // be a single loop (ForStmt), but rather a loop sequence - // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop - // and the post-fusion loops, preserving its original order. + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + // + // Note: If looprange clause produces a single fused loop nest then + // this compound statement wrapper is unnecessary (Therefore this + // treatment is skipped) + Stmt *FusionStmt = FusedForStmt; - if (LRC) { + if (LRC && CountVal != LoopSeqSize) { SmallVector FinalLoops; - // Gather all the pre-fusion loops - for (unsigned I = 0; I < FirstVal - 1; ++I) - FinalLoops.push_back(LoopStmts[I]); - // Gather the fused loop - FinalLoops.push_back(FusedForStmt); - // Gather all the post-fusion loops - for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I) + // Reset the transform index + TransformIndex = 0; + + // Collect all non-fused loops before and after the fused region. + // Pre-fusion and post-fusion loops are inserted in order exploiting their + // symmetry, along with their corresponding transformation pre-inits if + // needed. The fused loop is added between the two regions. + for (unsigned I = 0; I < LoopSeqSize; ++I) { + if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) { + // Update the Transformation counter to skip already treated + // loop transformations + if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + continue; + } + + // No need to handle: + // Regular loops: they are kept intact as-is. + // Loop-sequence-generating transformations: already handled earlier. + // Only TransformSingleLoop requires inserting pre-inits here + + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + } + FinalLoops.push_back(LoopStmts[I]); + } + + FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt); FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), SourceLocation(), SourceLocation()); } diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp index ac4f0d38a9c68..9d85bd1172948 100644 --- a/clang/test/OpenMP/fuse_ast_print.cpp +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -338,6 +338,61 @@ void tfoo9() { foo9<1, 2>(); } +// PRINT-LABEL: void foo10( +// DUMP-LABEL: FunctionDecl {{.*}} foo10 +void foo10() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int ii = 0; ii < 10; ii += 2) + // DUMP: ForStmt + for (int ii = 0; ii < 10; ii += 2) + // PRINT: body(ii) + // DUMP: CallExpr + body(ii); + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + { + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int jj = 10; jj > 0; --jj) + // DUMP: ForStmt + for (int jj = 10; jj > 0; --jj) + // PRINT: body(jj) + // DUMP: CallExpr + body(jj); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + // PRINT: for (int kk = 0; kk <= 10; ++kk) + // DUMP: ForStmt + for (int kk = 0; kk <= 10; ++kk) + // PRINT: body(kk) + // DUMP: CallExpr + body(kk); + } + } + +} diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp index d9500bed3ce31..742c280ed0172 100644 --- a/clang/test/OpenMP/fuse_codegen.cpp +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -65,6 +65,23 @@ extern "C" void foo4() { } } +// This exemplifies the usage of loop transformations that generate +// more than top level canonical loop nests (e.g split, loopranged fuse...) +extern "C" void foo5() { + double arr[256]; + #pragma omp fuse looprange(2,2) + { + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 512; ++k) body(k); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + #endif // CHECK1-LABEL: define dso_local void @body( @@ -88,7 +105,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -97,7 +113,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -129,107 +144,103 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] // CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] // CHECK1: [[IF_THEN22]]: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] // CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] // CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]]) // CHECK1-NEXT: br label %[[IF_END27]] // CHECK1: [[IF_END27]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK1: [[FOR_END]]: @@ -256,7 +267,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -265,7 +275,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -274,7 +283,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 @@ -304,172 +312,166 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] // CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 // CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] // CHECK1: [[COND_TRUE30]]: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 // CHECK1-NEXT: br label %[[COND_END32:.*]] // CHECK1: [[COND_FALSE31]]: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 // CHECK1-NEXT: br label %[[COND_END32]] // CHECK1: [[COND_END32]]: -// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] // CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] -// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] -// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] // CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] // CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] // CHECK1: [[IF_THEN40]]: -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] // CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] -// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] // CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]]) // CHECK1-NEXT: br label %[[IF_END45]] // CHECK1: [[IF_END45]]: -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] // CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] // CHECK1: [[IF_THEN47]]: -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] -// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] // CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] -// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] // CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]]) // CHECK1-NEXT: br label %[[IF_END52]] // CHECK1: [[IF_END52]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: [[FOR_END]]: @@ -481,13 +483,11 @@ extern "C" void foo4() { // CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -497,48 +497,43 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 @@ -565,225 +560,219 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 // CHECK1-NEXT: store i32 42, ptr [[C]], align 4 // CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 // CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 // CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 -// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 -// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 -// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 -// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 // CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 -// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 -// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 -// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] -// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 -// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 -// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 -// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 -// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 -// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 -// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] -// CHECK1: [[COND_TRUE44]]: -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: br label %[[COND_END46:.*]] -// CHECK1: [[COND_FALSE45]]: -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: br label %[[COND_END46]] -// CHECK1: [[COND_END46]]: -// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] -// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] -// CHECK1: [[COND_TRUE50]]: -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: br label %[[COND_END52:.*]] -// CHECK1: [[COND_FALSE51]]: -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: br label %[[COND_END52]] -// CHECK1: [[COND_END52]]: -// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] -// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK1: [[COND_TRUE42]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: br label %[[COND_END44:.*]] +// CHECK1: [[COND_FALSE43]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END44]] +// CHECK1: [[COND_END44]]: +// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK1: [[COND_TRUE48]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50:.*]] +// CHECK1: [[COND_FALSE49]]: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50]] +// CHECK1: [[COND_END50]]: +// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 -// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 -// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] -// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] -// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 -// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 -// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 -// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] -// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] -// CHECK1: [[IF_THEN64]]: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] -// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] -// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 -// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] -// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN62]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] -// CHECK1: [[IF_THEN70]]: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] -// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] -// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 -// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] -// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]]) -// CHECK1-NEXT: br label %[[IF_END75]] -// CHECK1: [[IF_END75]]: -// CHECK1-NEXT: br label %[[IF_END76]] -// CHECK1: [[IF_END76]]: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] -// CHECK1: [[IF_THEN78]]: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] -// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] -// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 -// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 -// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] -// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) -// CHECK1-NEXT: br label %[[IF_END83]] -// CHECK1: [[IF_END83]]: -// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] -// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] -// CHECK1: [[IF_THEN85]]: -// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] -// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] -// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 -// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 -// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] -// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) -// CHECK1-NEXT: br label %[[IF_END90]] -// CHECK1: [[IF_END90]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK1: [[IF_THEN68]]: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END73]] +// CHECK1: [[IF_END73]]: +// CHECK1-NEXT: br label %[[IF_END74]] +// CHECK1: [[IF_END74]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK1: [[IF_THEN76]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK1-NEXT: br label %[[IF_END81]] +// CHECK1: [[IF_END81]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK1: [[IF_THEN83]]: +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK1-NEXT: br label %[[IF_END88]] +// CHECK1: [[IF_END88]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 -// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: [[FOR_END]]: // CHECK1-NEXT: ret void @@ -794,13 +783,11 @@ extern "C" void foo4() { // CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -815,12 +802,10 @@ extern "C" void foo4() { // CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 @@ -940,6 +925,277 @@ extern "C" void foo4() { // CHECK1-NEXT: ret void // // +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK1: [[COND_TRUE24]]: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: br label %[[COND_END26:.*]] +// CHECK1: [[COND_FALSE25]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END26]] +// CHECK1: [[COND_END26]]: +// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30:.*]] +// CHECK1: [[FOR_COND30]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK1: [[FOR_BODY32]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN41]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[IF_END53]] +// CHECK1: [[IF_END53]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK1: [[IF_THEN55]]: +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END60]] +// CHECK1: [[IF_END60]]: +// CHECK1-NEXT: br label %[[FOR_INC61:.*]] +// CHECK1: [[FOR_INC61]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: [[FOR_END63]]: +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70:.*]] +// CHECK1: [[FOR_COND70]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK1: [[FOR_BODY72]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK1-NEXT: br label %[[FOR_INC73:.*]] +// CHECK1: [[FOR_INC73]]: +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70]] +// CHECK1: [[FOR_END74]]: +// CHECK1-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @body( // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -961,7 +1217,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -970,7 +1225,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1002,107 +1256,103 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK2: [[COND_TRUE]]: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK2-NEXT: br label %[[COND_END:.*]] // CHECK2: [[COND_FALSE]]: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK2-NEXT: br label %[[COND_END]] // CHECK2: [[COND_END]]: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] // CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] // CHECK2: [[IF_THEN22]]: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] -// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] // CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] // CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]]) // CHECK2-NEXT: br label %[[IF_END27]] // CHECK2: [[IF_END27]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK2: [[FOR_END]]: @@ -1114,13 +1364,11 @@ extern "C" void foo4() { // CHECK2-NEXT: [[ENTRY:.*:]] // CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1130,48 +1378,43 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 @@ -1198,225 +1441,219 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 -// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 // CHECK2-NEXT: store i32 42, ptr [[C]], align 4 // CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 // CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 // CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 -// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 -// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 -// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 -// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 -// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 // CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 -// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 -// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 -// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 -// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 -// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] -// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 -// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 -// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 -// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 -// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 -// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 -// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] -// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] -// CHECK2: [[COND_TRUE44]]: -// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: br label %[[COND_END46:.*]] -// CHECK2: [[COND_FALSE45]]: -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: br label %[[COND_END46]] -// CHECK2: [[COND_END46]]: -// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] -// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] -// CHECK2: [[COND_TRUE50]]: -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: br label %[[COND_END52:.*]] -// CHECK2: [[COND_FALSE51]]: -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: br label %[[COND_END52]] -// CHECK2: [[COND_END52]]: -// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] -// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK2: [[COND_TRUE42]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: br label %[[COND_END44:.*]] +// CHECK2: [[COND_FALSE43]]: +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END44]] +// CHECK2: [[COND_END44]]: +// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK2: [[COND_TRUE48]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50:.*]] +// CHECK2: [[COND_FALSE49]]: +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50]] +// CHECK2: [[COND_END50]]: +// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 -// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 -// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 -// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] -// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] -// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 -// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 -// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 -// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] -// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] -// CHECK2: [[IF_THEN64]]: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] -// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] -// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 -// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] -// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN62]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] -// CHECK2: [[IF_THEN70]]: -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] -// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] -// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 -// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] -// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]]) -// CHECK2-NEXT: br label %[[IF_END75]] -// CHECK2: [[IF_END75]]: -// CHECK2-NEXT: br label %[[IF_END76]] -// CHECK2: [[IF_END76]]: -// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] -// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] -// CHECK2: [[IF_THEN78]]: -// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 -// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 -// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] -// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] -// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 -// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 -// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 -// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] -// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 -// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 -// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) -// CHECK2-NEXT: br label %[[IF_END83]] -// CHECK2: [[IF_END83]]: -// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] -// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] -// CHECK2: [[IF_THEN85]]: -// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 -// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 -// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] -// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] -// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 -// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 -// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 -// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] -// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 -// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 -// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 -// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) -// CHECK2-NEXT: br label %[[IF_END90]] -// CHECK2: [[IF_END90]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK2: [[IF_THEN68]]: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END73]] +// CHECK2: [[IF_END73]]: +// CHECK2-NEXT: br label %[[IF_END74]] +// CHECK2: [[IF_END74]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK2: [[IF_THEN76]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK2-NEXT: br label %[[IF_END81]] +// CHECK2: [[IF_END81]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK2: [[IF_THEN83]]: +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK2-NEXT: br label %[[IF_END88]] +// CHECK2: [[IF_END88]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 -// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void @@ -1427,13 +1664,11 @@ extern "C" void foo4() { // CHECK2-NEXT: [[ENTRY:.*:]] // CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1448,12 +1683,10 @@ extern "C" void foo4() { // CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 @@ -1573,6 +1806,277 @@ extern "C" void foo4() { // CHECK2-NEXT: ret void // // +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK2: [[COND_TRUE24]]: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: br label %[[COND_END26:.*]] +// CHECK2: [[COND_FALSE25]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END26]] +// CHECK2: [[COND_END26]]: +// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30:.*]] +// CHECK2: [[FOR_COND30]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK2: [[FOR_BODY32]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN41]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[IF_END53]] +// CHECK2: [[IF_END53]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK2: [[IF_THEN55]]: +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END60]] +// CHECK2: [[IF_END60]]: +// CHECK2-NEXT: br label %[[FOR_INC61:.*]] +// CHECK2: [[FOR_INC61]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: [[FOR_END63]]: +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70:.*]] +// CHECK2: [[FOR_COND70]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK2: [[FOR_BODY72]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK2-NEXT: br label %[[FOR_INC73:.*]] +// CHECK2: [[FOR_INC73]]: +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70]] +// CHECK2: [[FOR_END74]]: +// CHECK2-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @tfoo2( // CHECK2-SAME: ) #[[ATTR0]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1593,7 +2097,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -1602,7 +2105,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1611,7 +2113,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 @@ -1641,174 +2142,168 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] // CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 // CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK2: [[COND_TRUE]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK2-NEXT: br label %[[COND_END:.*]] // CHECK2: [[COND_FALSE]]: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK2-NEXT: br label %[[COND_END]] // CHECK2: [[COND_END]]: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] // CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] // CHECK2: [[COND_TRUE30]]: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 // CHECK2-NEXT: br label %[[COND_END32:.*]] // CHECK2: [[COND_FALSE31]]: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 // CHECK2-NEXT: br label %[[COND_END32]] // CHECK2: [[COND_END32]]: -// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] // CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] -// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] -// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] // CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] // CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] // CHECK2: [[IF_THEN40]]: -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] -// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] // CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] -// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] // CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]]) // CHECK2-NEXT: br label %[[IF_END45]] // CHECK2: [[IF_END45]]: -// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] // CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] // CHECK2: [[IF_THEN47]]: -// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 -// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] -// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] // CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 -// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] -// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] // CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]]) // CHECK2-NEXT: br label %[[IF_END52]] // CHECK2: [[IF_END52]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void // @@ -1819,6 +2314,8 @@ extern "C" void foo4() { // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} // CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} // CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} //. // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} @@ -1826,4 +2323,6 @@ extern "C" void foo4() { // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} // CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} // CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} //. >From 860fcd94d930c9644b4d0427471f2873e7afcf8b Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:44:48 +0000 Subject: [PATCH 05/12] Fixed missing diagnostic groups in warnings --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 002aa7a774fbe..e85cd32d78b5c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11613,7 +11613,8 @@ def note_omp_implicit_dsa : Note< def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; def warn_omp_different_loop_ind_var_types : Warning < - "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">; + "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">, + InGroup; def err_omp_not_canonical_loop : Error < "loop after '#pragma omp %0' is not in canonical form">; def err_omp_not_a_loop_sequence : Error < @@ -11624,7 +11625,8 @@ def err_omp_invalid_looprange : Error < "loop range in '#pragma omp %0' exceeds the number of available loops: " "range end '%1' is greater than the total number of loops '%2'">; def warn_omp_redundant_fusion : Warning < - "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">; + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, + InGroup; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; >From 65cbfeb945e6b8016696906db43dd590adb285b2 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:49:50 +0000 Subject: [PATCH 06/12] Fixed formatting and comments --- clang/lib/Sema/SemaOpenMP.cpp | 112 ++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3ce256f3ec23b..2985b256cf153 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14197,42 +14197,43 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( } // Overloaded base case function -template -static bool tryHandleAs(T *t, F &&) { - return false; +template static bool tryHandleAs(T *t, F &&) { + return false; } /** - * Tries to recursively cast `t` to one of the given types and invokes `f` if successful. + * Tries to recursively cast `t` to one of the given types and invokes `f` if + * successful. * * @tparam Class The first type to check. * @tparam Rest The remaining types to check. * @tparam T The base type of `t`. - * @tparam F The callable type for the function to invoke upon a successful cast. + * @tparam F The callable type for the function to invoke upon a successful + * cast. * @param t The object to be checked. * @param f The function to invoke if `t` matches `Class`. * @return `true` if `t` matched any type and `f` was called, otherwise `false`. */ template static bool tryHandleAs(T *t, F &&f) { - if (Class *c = dyn_cast(t)) { - f(c); - return true; - } else { - return tryHandleAs(t, std::forward(f)); - } + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } else { + return tryHandleAs(t, std::forward(f)); + } } // Updates OriginalInits by checking Transform against loop transformation // directives and appending their pre-inits if a match is found. static void updatePreInits(OMPLoopBasedDirective *Transform, SmallVectorImpl> &PreInits) { - if (!tryHandleAs( - Transform, [&PreInits](auto *Dir) { - appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); - })) - llvm_unreachable("Unhandled loop transformation"); + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); } bool SemaOpenMP::checkTransformableLoopNest( @@ -14310,43 +14311,42 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { unsigned getNestedLoopCount() const { return NestedLoopCount; } bool VisitForStmt(ForStmt *FS) override { - ++NestedLoopCount; - return true; + ++NestedLoopCount; + return true; } bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { - ++NestedLoopCount; - return true; + ++NestedLoopCount; + return true; } bool TraverseStmt(Stmt *S) override { - if (!S) + if (!S) return true; - // Skip traversal of all expressions, including special cases like - // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions - // may contain inner statements (and even loops), but they are not part - // of the syntactic body of the surrounding loop structure. - // Therefore must not be counted - if (isa(S)) + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) return true; - // Only recurse into CompoundStmt (block {}) and loop bodies - if (isa(S) || isa(S) || - isa(S)) { + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S) || isa(S) || isa(S)) { return DynamicRecursiveASTVisitor::TraverseStmt(S); - } + } - // Stop traversal of the rest of statements, that break perfect - // loop nesting, such as control flow (IfStmt, SwitchStmt...) - return true; + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; } bool TraverseDecl(Decl *D) override { - // Stop in the case of finding a declaration, it is not important - // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, - // FunctionDecl...) - return true; + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; } }; @@ -14504,15 +14504,14 @@ bool SemaOpenMP::analyzeLoopSequence( return isa(Child); }; - // High level grammar validation for (auto *Child : LoopSeqStmt->children()) { - if (!Child) + if (!Child) continue; - // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { Child = Child->IgnoreContainers(); // Ignore empty compound statement @@ -14530,9 +14529,9 @@ bool SemaOpenMP::analyzeLoopSequence( // Already been treated, skip this children continue; } - } - // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { if (isLoopGeneratingStmt(Child)) { if (!analyzeLoopGeneration(Child)) { return false; @@ -14546,12 +14545,12 @@ bool SemaOpenMP::analyzeLoopSequence( // Update the Loop Sequence size by one ++LoopSeqSize; } - } else { + } else { // Report error for invalid statement inside canonical loop sequence Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; - } + } } return true; } @@ -14568,9 +14567,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Checks whether the given statement is a compound statement if (!isa(AStmt)) { - Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) - << getOpenMPDirectiveName(Kind); - return false; + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; } // Number of top level canonical loop nests observed (And acts as index) LoopSeqSize = 0; @@ -14601,7 +14600,7 @@ bool SemaOpenMP::checkTransformableLoopSequence( OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind)) { - return false; + return false; } if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) @@ -15315,7 +15314,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *LoopStmt = nullptr; collectLoopStmts(AStmt, {LoopStmt}); - // Determine the PreInit declarations.e + // Determine the PreInit declarations. SmallVector PreInits; addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); @@ -15931,13 +15930,18 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CountVal = CountInt.getZExtValue(); }; - // Checks if the loop range is valid + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, unsigned NumLoops) -> bool { return FirstVal + CountVal - 1 <= NumLoops; }; uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + // Validates the loop range after evaluating the semantic information + // and ensures that the range is valid for the given loop sequence size. + // Expressions are evaluated at compile time to obtain constant values. if (LRC) { EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, CountVal); >From b0fb1b3e26f1d9ceaac4495dcfad84f54f96d2a2 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:58:54 +0000 Subject: [PATCH 07/12] Added minimal changes to enable flang future implementation --- flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 9 +++++++++ flang/lib/Lower/OpenMP/Clauses.cpp | 5 +++++ flang/lib/Lower/OpenMP/Clauses.h | 1 + flang/lib/Parser/openmp-parsers.cpp | 7 +++++++ flang/lib/Parser/unparse.cpp | 7 +++++++ flang/lib/Semantics/check-omp-structure.cpp | 9 +++++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 8 files changed, 40 insertions(+) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index e3eed6aed8079..76aa3f7b90156 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -600,6 +600,7 @@ class ParseTreeDumper { NODE(OmpLinearClause, Modifier) NODE(parser, OmpLinearModifier) NODE_ENUM(OmpLinearModifier, Value) + NODE(parser, OmpLoopRangeClause) NODE(parser, OmpStepComplexModifier) NODE(parser, OmpStepSimpleModifier) NODE(parser, OmpLoopDirective) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 61f97b855b0e5..d32db62db2628 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4367,6 +4367,15 @@ struct OmpLinearClause { std::tuple t; }; +// Ref: [6.0:207-208] +// +// loop-range-clause -> +// LOOPRANGE(first, count) // since 6.0 +struct OmpLoopRangeClause { + TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause); + std::tuple t; +}; + // Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] // // map-clause -> diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index b599d69a36272..a38249bf2b588 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -997,6 +997,11 @@ Link make(const parser::OmpClause::Link &inp, return Link{/*List=*/makeObjects(inp.v, semaCtx)}; } +LoopRange make(const parser::OmpClause::Looprange &inp, + semantics::SemanticsContext &semaCtx) { + llvm_unreachable("Unimplemented: looprange"); +} + Map make(const parser::OmpClause::Map &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpMapClause diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index d7ab21d428e32..bda8571e65f23 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT; using InReduction = tomp::clause::InReductionT; using IsDevicePtr = tomp::clause::IsDevicePtrT; using Lastprivate = tomp::clause::LastprivateT; +using LoopRange = tomp::clause::LoopRangeT; using Linear = tomp::clause::LinearT; using Link = tomp::clause::LinkT; using Map = tomp::clause::MapT; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index c55642d969503..d53389746dbec 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -847,6 +847,11 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) +TYPE_PARSER( + construct(scalarIntConstantExpr, + "," >> scalarIntConstantExpr) +) + // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1020,6 +1025,8 @@ TYPE_PARSER( // parenthesized(Parser{}))) || "LINK" >> construct(construct( parenthesized(Parser{}))) || + "LOOPRANGE" >> construct(construct( + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ed0f227fd5b98..18e8a63ca68aa 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2318,6 +2318,13 @@ class UnparseVisitor { } } } + void Unparse(const OmpLoopRangeClause &x) { + Word("LOOPRANGE("); + Walk(std::get<0>(x.t)); + Put(", "); + Walk(std::get<1>(x.t)); + Put(")"); + } void Unparse(const OmpReductionClause &x) { using Modifier = OmpReductionClause::Modifier; Walk(std::get>>(x.t), ": "); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 68cea6739830d..2a03c6a1fd0e4 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4446,6 +4446,15 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) +void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { + context_.Say(GetContext().clauseSource, + "LOOPRANGE clause is not implemented yet"_err_en_US, + ContextDirectiveAsFortran()); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) { + context_.Say(GetContext().clauseSource, + "FREE_AGENT clause is not implemented yet"_err_en_US, // Restrictions specific to each clause are implemented apart from the // generalized restrictions. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 989b35a7caa2a..f8acdc62aba3d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -276,6 +276,7 @@ def OMPC_Link : Clause<[Spelling<"link">]> { } def OMPC_LoopRange : Clause<[Spelling<"looprange">]> { let clangClass = "OMPLoopRangeClause"; + let flangClass = "OmpLoopRangeClause"; } def OMPC_Map : Clause<[Spelling<"map">]> { let clangClass = "OMPMapClause"; >From b252aa910ef7c5c278a86bd7195bbf3bb18dd18d Mon Sep 17 00:00:00 2001 From: eZWALT Date: Wed, 21 May 2025 13:14:22 +0000 Subject: [PATCH 08/12] Address basic PR feedback --- clang/include/clang/AST/OpenMPClause.h | 93 ++++---- clang/include/clang/AST/StmtOpenMP.h | 2 +- clang/include/clang/Sema/SemaOpenMP.h | 14 +- clang/lib/AST/OpenMPClause.cpp | 17 +- clang/lib/CodeGen/CGExpr.cpp | 5 +- clang/lib/CodeGen/CodeGenFunction.h | 4 - clang/lib/Sema/SemaOpenMP.cpp | 224 +++++++++----------- flang/lib/Semantics/check-omp-structure.cpp | 3 - 8 files changed, 166 insertions(+), 196 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 8f937cdef9cd0..3df5133a17fb4 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1153,82 +1153,73 @@ class OMPFullClause final : public OMPNoChildClause { /// for(int j = 0; j < 256; j+=2) /// for(int k = 127; k >= 0; --k) /// \endcode -class OMPLoopRangeClause final : public OMPClause { +class OMPLoopRangeClause final + : public OMPClause, + private llvm::TrailingObjects { friend class OMPClauseReader; - - explicit OMPLoopRangeClause() - : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + friend class llvm::TrailingObjects; /// Location of '(' SourceLocation LParenLoc; - /// Location of 'first' - SourceLocation FirstLoc; - - /// Location of 'count' - SourceLocation CountLoc; - - /// Expr associated with 'first' argument - Expr *First = nullptr; - - /// Expr associated with 'count' argument - Expr *Count = nullptr; - - /// Set 'first' - void setFirst(Expr *First) { this->First = First; } + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; - /// Set 'count' - void setCount(Expr *Count) { this->Count = Count; } + /// Number of looprange arguments (always 2: first, count) + unsigned NumArgs = 2; - /// Set location of '('. - void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } - - /// Set location of 'first' argument - void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + /// Set the argument expressions. + void setArgs(ArrayRef Args) { + assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments"); + std::copy(Args.begin(), Args.end(), getTrailingObjects()); + } - /// Set location of 'count' argument - void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {} public: - /// Build an AST node for a 'looprange' clause - /// - /// \param StartLoc Starting location of the clause. - /// \param LParenLoc Location of '('. - /// \param ModifierLoc Modifier location. - /// \param + /// Build a 'looprange' clause AST node. static OMPLoopRangeClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, - SourceLocation EndLoc, Expr *First, Expr *Count); + SourceLocation EndLoc, ArrayRef Args); - /// Build an empty 'looprange' node for deserialization - /// - /// \param C Context of the AST. + /// Build an empty 'looprange' clause node. static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); - /// Returns the location of '(' + // Location getters/setters SourceLocation getLParenLoc() const { return LParenLoc; } - - /// Returns the location of 'first' SourceLocation getFirstLoc() const { return FirstLoc; } - - /// Returns the location of 'count' SourceLocation getCountLoc() const { return CountLoc; } - /// Returns the argument 'first' or nullptr if not set - Expr *getFirst() const { return cast_or_null(First); } + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } - /// Returns the argument 'count' or nullptr if not set - Expr *getCount() const { return cast_or_null(Count); } + /// Get looprange arguments: first and count + Expr *getFirst() const { return getArgs()[0]; } + Expr *getCount() const { return getArgs()[1]; } - child_range children() { - return child_range(reinterpret_cast(&First), - reinterpret_cast(&Count) + 1); + /// Set looprange arguments: first and count + void setFirst(Expr *E) { getArgs()[0] = E; } + void setCount(Expr *E) { getArgs()[1] = E; } + + MutableArrayRef getArgs() { + return MutableArrayRef(getTrailingObjects(), NumArgs); + } + ArrayRef getArgs() const { + return ArrayRef(getTrailingObjects(), NumArgs); } + child_range children() { + return child_range(reinterpret_cast(getArgs().begin()), + reinterpret_cast(getArgs().end())); + } const_child_range children() const { - auto Children = const_cast(this)->children(); - return const_child_range(Children.begin(), Children.end()); + auto AR = getArgs(); + return const_child_range(reinterpret_cast(AR.begin()), + reinterpret_cast(AR.end())); } child_range used_children() { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 6425f6616a558..0421c06245cac 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5883,7 +5883,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { EndLoc, NumLoops) { // Interchange produces a single top-level canonical loop // nest, with the exact same amount of total loops - setNumGeneratedLoops(NumLoops); + setNumGeneratedLoops(3 * NumLoops); setNumGeneratedLoopNests(1); } diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 547ea95c6cd5d..f848c4a7d715e 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1492,7 +1492,7 @@ class SemaOpenMP : public SemaBase { bool checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits); + Stmt *&Body, SmallVectorImpl> &OriginalInits); /// @brief Categories of loops encountered during semantic OpenMP loop /// analysis @@ -1555,9 +1555,9 @@ class SemaOpenMP : public SemaBase { Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context, OpenMPDirectiveKind Kind); @@ -1591,9 +1591,9 @@ class SemaOpenMP : public SemaBase { unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0b5808eb100e4..e0570262b2a05 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1026,22 +1026,25 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { OMPLoopRangeClause * OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc, - SourceLocation FirstLoc, SourceLocation CountLoc, - Expr *First, Expr *Count) { + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc, + ArrayRef Args) { + + assert(Args.size() == 2 && + "looprange clause must have exactly two arguments"); OMPLoopRangeClause *Clause = CreateEmpty(C); Clause->setLocStart(StartLoc); Clause->setLParenLoc(LParenLoc); - Clause->setLocEnd(EndLoc); Clause->setFirstLoc(FirstLoc); Clause->setCountLoc(CountLoc); - Clause->setFirst(First); - Clause->setCount(Count); + Clause->setLocEnd(EndLoc); + Clause->setArgs(Args); return Clause; } OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { - return new (C) OMPLoopRangeClause(); + void *Mem = C.Allocate(totalSizeToAlloc(2)); + return new (Mem) OMPLoopRangeClause(); } OMPAllocateClause *OMPAllocateClause::Create( diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 08049d4d4e37d..f983b88eb61ec 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3256,11 +3256,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); // No other cases for now. - } else { - llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n"; - VD->dumpColor(); + } else llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); - } // Handle threadlocal function locals. if (VD->getTLSKind() != VarDecl::TLS_None) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index bfe24213ed377..fe753e5b688b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5363,10 +5363,6 @@ class CodeGenFunction : public CodeGenTypeCache { /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { - if (LocalDeclMap.count(VD)) { - llvm::errs() << "Warning: VarDecl already exists in map: "; - VD->dumpColor(); - } assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2985b256cf153..9819dcfe60360 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14196,38 +14196,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -// Overloaded base case function +/// Overloaded base case function template static bool tryHandleAs(T *t, F &&) { return false; } -/** - * Tries to recursively cast `t` to one of the given types and invokes `f` if - * successful. - * - * @tparam Class The first type to check. - * @tparam Rest The remaining types to check. - * @tparam T The base type of `t`. - * @tparam F The callable type for the function to invoke upon a successful - * cast. - * @param t The object to be checked. - * @param f The function to invoke if `t` matches `Class`. - * @return `true` if `t` matched any type and `f` was called, otherwise `false`. - */ +/// +/// Tries to recursively cast `t` to one of the given types and invokes `f` if +/// successful. +/// +/// @tparam Class The first type to check. +/// @tparam Rest The remaining types to check. +/// @tparam T The base type of `t`. +/// @tparam F The callable type for the function to invoke upon a successful +/// cast. +/// @param t The object to be checked. +/// @param f The function to invoke if `t` matches `Class`. +/// @return `true` if `t` matched any type and `f` was called, otherwise +/// `false`. template static bool tryHandleAs(T *t, F &&f) { if (Class *c = dyn_cast(t)) { f(c); return true; - } else { - return tryHandleAs(t, std::forward(f)); } + return tryHandleAs(t, std::forward(f)); } -// Updates OriginalInits by checking Transform against loop transformation -// directives and appending their pre-inits if a match is found. +/// Updates OriginalInits by checking Transform against loop transformation +/// directives and appending their pre-inits if a match is found. static void updatePreInits(OMPLoopBasedDirective *Transform, - SmallVectorImpl> &PreInits) { + SmallVectorImpl> &PreInits) { if (!tryHandleAs( Transform, [&PreInits](auto *Dir) { @@ -14239,7 +14238,7 @@ static void updatePreInits(OMPLoopBasedDirective *Transform, bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits) { + Stmt *&Body, SmallVectorImpl> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -14273,40 +14272,40 @@ bool SemaOpenMP::checkTransformableLoopNest( return Result; } -// Counts the total number of nested loops, including the outermost loop (the -// original loop). PRECONDITION of this visitor is that it must be invoked from -// the original loop to be analyzed. The traversal is stop for Decl's and -// Expr's given that they may contain inner loops that must not be counted. -// -// Example AST structure for the code: -// -// int main() { -// #pragma omp fuse -// { -// for (int i = 0; i < 100; i++) { <-- Outer loop -// []() { -// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP -// }; -// for(int j = 0; j < 5; ++j) {} <-- Inner loop -// } -// for (int r = 0; i < 100; i++) { <-- Outer loop -// struct LocalClass { -// void bar() { -// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP -// } -// }; -// for(int k = 0; k < 10; ++k) {} <-- Inner loop -// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP -// } -// } -// } -// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +/// Counts the total number of nested loops, including the outermost loop (the +/// original loop). PRECONDITION of this visitor is that it must be invoked from +/// the original loop to be analyzed. The traversal is stop for Decl's and +/// Expr's given that they may contain inner loops that must not be counted. +/// +/// Example AST structure for the code: +/// +/// int main() { +/// #pragma omp fuse +/// { +/// for (int i = 0; i < 100; i++) { <-- Outer loop +/// []() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// }; +/// for(int j = 0; j < 5; ++j) {} <-- Inner loop +/// } +/// for (int r = 0; i < 100; i++) { <-- Outer loop +/// struct LocalClass { +/// void bar() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// } +/// }; +/// for(int k = 0; k < 10; ++k) {} <-- Inner loop +/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +/// } +/// } +/// } +/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { private: unsigned NestedLoopCount = 0; public: - explicit NestedLoopCounterVisitor() {} + explicit NestedLoopCounterVisitor() = default; unsigned getNestedLoopCount() const { return NestedLoopCount; } @@ -14333,7 +14332,7 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { return true; // Only recurse into CompoundStmt (block {}) and loop bodies - if (isa(S) || isa(S) || isa(S)) { + if (isa(S)) { return DynamicRecursiveASTVisitor::TraverseStmt(S); } @@ -14354,19 +14353,18 @@ bool SemaOpenMP::analyzeLoopSequence( Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context, OpenMPDirectiveKind Kind) { VarsWithInheritedDSAType TmpDSA; QualType BaseInductionVarType; - // Helper Lambda to handle storing initialization and body statements for both - // ForStmt and CXXForRangeStmt and checks for any possible mismatch between - // induction variables types - auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, - this, &Context](Stmt *LoopStmt) { + /// Helper Lambda to handle storing initialization and body statements for + /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch + /// between induction variables types + auto StoreLoopStatements = [&](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { OriginalInits.back().push_back(For->getInit()); ForStmts.push_back(For); @@ -14394,16 +14392,11 @@ bool SemaOpenMP::analyzeLoopSequence( } }; - // Helper lambda functions to encapsulate the processing of different - // derivations of the canonical loop sequence grammar - // - // Modularized code for handling loop generation and transformations - auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers, - &OriginalInits, &TransformsPreInits, - &LoopCategories, &LoopSeqSize, &NumLoops, Kind, - &TmpDSA, &ForStmts, &Context, - &LoopSequencePreInits, this](Stmt *Child) { - auto LoopTransform = dyn_cast(Child); + /// Helper lambda functions to encapsulate the processing of different + /// derivations of the canonical loop sequence grammar + /// Modularized code for handling loop generation and transformations + auto AnalyzeLoopGeneration = [&](Stmt *Child) { + auto *LoopTransform = dyn_cast(Child); Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); @@ -14414,9 +14407,8 @@ bool SemaOpenMP::analyzeLoopSequence( LoopSeqSize += NumGeneratedLoopNests; NumLoops += NumGeneratedLoops; return true; - } - // Unroll full (0 loops produced) - else { + } else { + // Unroll full (0 loops produced) Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; @@ -14443,9 +14435,8 @@ bool SemaOpenMP::analyzeLoopSequence( LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind); - } - // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) - else { + } else { + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) // Process the transformed loop statement OriginalInits.emplace_back(); TransformsPreInits.emplace_back(); @@ -14461,7 +14452,7 @@ bool SemaOpenMP::analyzeLoopSequence( << getOpenMPDirectiveName(Kind); return false; } - storeLoopStatements(TransformedStmt); + StoreLoopStatements(TransformedStmt); updatePreInits(LoopTransform, TransformsPreInits); NumLoops += NumGeneratedLoops; @@ -14470,10 +14461,8 @@ bool SemaOpenMP::analyzeLoopSequence( } }; - // Modularized code for handling regular canonical loops - auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, - &LoopCategories, this](Stmt *Child) { + /// Modularized code for handling regular canonical loops + auto AnalyzeRegularLoop = [&](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); LoopCategories.push_back(OMPLoopCategory::RegularLoop); @@ -14488,19 +14477,19 @@ bool SemaOpenMP::analyzeLoopSequence( return false; } - storeLoopStatements(Child); + StoreLoopStatements(Child); auto NLCV = NestedLoopCounterVisitor(); NLCV.TraverseStmt(Child); NumLoops += NLCV.getNestedLoopCount(); return true; }; - // Helper functions to validate canonical loop sequence grammar is valid - auto isLoopSequenceDerivation = [](auto *Child) { - return isa(Child) || isa(Child) || - isa(Child); + /// Helper functions to validate loop sequence grammar derivations + auto IsLoopSequenceDerivation = [](auto *Child) { + return isa(Child); }; - auto isLoopGeneratingStmt = [](auto *Child) { + /// Helper functions to validate loop generating grammar derivations + auto IsLoopGeneratingStmt = [](auto *Child) { return isa(Child); }; @@ -14511,7 +14500,7 @@ bool SemaOpenMP::analyzeLoopSequence( continue; // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { + if (!IsLoopSequenceDerivation(Child)) { Child = Child->IgnoreContainers(); // Ignore empty compound statement @@ -14531,17 +14520,17 @@ bool SemaOpenMP::analyzeLoopSequence( } } // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { - if (isLoopGeneratingStmt(Child)) { - if (!analyzeLoopGeneration(Child)) { + if (IsLoopSequenceDerivation(Child)) { + if (IsLoopGeneratingStmt(Child)) { + if (!AnalyzeLoopGeneration(Child)) return false; - } - // analyzeLoopGeneration updates Loop Sequence size accordingly + + // AnalyzeLoopGeneration updates Loop Sequence size accordingly } else { - if (!analyzeRegularLoop(Child)) { + if (!AnalyzeRegularLoop(Child)) return false; - } + // Update the Loop Sequence size by one ++LoopSeqSize; } @@ -14560,9 +14549,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context) { // Checks whether the given statement is a compound statement @@ -14598,10 +14587,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Recursive entry point to process the main loop sequence if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, - LoopSequencePreInits, LoopCategories, Context, - Kind)) { + LoopSequencePreInits, LoopCategories, Context, Kind)) return false; - } + if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) << getOpenMPDirectiveName(Kind); @@ -14693,7 +14681,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14970,7 +14958,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15231,7 +15219,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15499,7 +15487,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15691,7 +15679,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 2> OriginalInits; + SmallVector, 2> OriginalInits; if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15878,9 +15866,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CaptureVars CopyTransformer(SemaRef); // Ensure the structured block is not empty - if (!AStmt) { + if (!AStmt) return StmtError(); - } unsigned NumLoops = 1; unsigned LoopSeqSize = 1; @@ -15899,16 +15886,15 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops SmallVector LoopHelpers; SmallVector LoopStmts; - SmallVector> OriginalInits; - SmallVector> TransformsPreInits; - SmallVector> LoopSequencePreInits; + SmallVector> OriginalInits; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; SmallVector LoopCategories; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, - LoopCategories, Context)) { + LoopCategories, Context)) return StmtError(); - } // Handle clauses, which can be any of the following: [looprange, apply] const OMPLoopRangeClause *LRC = @@ -15998,9 +15984,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // expressions. Generates both the variable declaration and the corresponding // initialization statement. auto CreateHelperVarAndStmt = - [&SemaRef = this->SemaRef, &Context, &CopyTransformer, - &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I, - bool NeedsNewVD = false) { + [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, + unsigned I, bool NeedsNewVD = false) { Expr *TransformedExpr = AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); if (!TransformedExpr) @@ -16044,9 +16029,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Transformations that apply this concept: Loopranged Fuse, Split if (!LoopSequencePreInits.empty()) { for (const auto <PreInits : LoopSequencePreInits) { - if (!LTPreInits.empty()) { + if (!LTPreInits.empty()) llvm::append_range(PreInits, LTPreInits); - } } } @@ -16075,9 +16059,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Order matters: pre-inits may define variables used in the original // inits such as upper bounds... auto TransformPreInit = TransformsPreInits[TransformIndex++]; - if (!TransformPreInit.empty()) { + if (!TransformPreInit.empty()) llvm::append_range(PreInits, TransformPreInit); - } + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], PreInits); } @@ -17496,13 +17480,15 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( if (CountVal.isInvalid()) Count = nullptr; + SmallVector ArgsVec = {First, Count}; + // OpenMP [6.0, Restrictions] // first + count - 1 must not evaluate to a value greater than the // loop sequence length of the associated canonical loop sequence. // This check must be performed afterwards due to the delayed // parsing and computation of the associated loop sequence return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, - FirstLoc, CountLoc, EndLoc, First, Count); + FirstLoc, CountLoc, EndLoc, ArgsVec); } OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2a03c6a1fd0e4..ac4883c4f2a18 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4452,9 +4452,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { ContextDirectiveAsFortran()); } -void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) { - context_.Say(GetContext().clauseSource, - "FREE_AGENT clause is not implemented yet"_err_en_US, // Restrictions specific to each clause are implemented apart from the // generalized restrictions. >From e294777879dd46c1859a03c307e70dd03abe11b7 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Thu, 22 May 2025 10:39:39 +0000 Subject: [PATCH 09/12] Removed unncessary warning and updated tests accordingly --- .../clang/Basic/DiagnosticSemaKinds.td | 3 -- clang/lib/Sema/SemaOpenMP.cpp | 21 +-------- clang/test/OpenMP/fuse_messages.cpp | 43 +++++++++++++++---- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e85cd32d78b5c..2bd0f895204c9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,9 +11612,6 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; -def warn_omp_different_loop_ind_var_types : Warning < - "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">, - InGroup; def err_omp_not_canonical_loop : Error < "loop after '#pragma omp %0' is not in canonical form">; def err_omp_not_a_loop_sequence : Error < diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 9819dcfe60360..5f36d968c68fa 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14360,31 +14360,12 @@ bool SemaOpenMP::analyzeLoopSequence( OpenMPDirectiveKind Kind) { VarsWithInheritedDSAType TmpDSA; - QualType BaseInductionVarType; /// Helper Lambda to handle storing initialization and body statements for - /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch - /// between induction variables types + /// both ForStmt and CXXForRangeStmt auto StoreLoopStatements = [&](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { OriginalInits.back().push_back(For->getInit()); ForStmts.push_back(For); - // Extract induction variable - if (auto *InitStmt = dyn_cast_or_null(For->getInit())) { - if (auto *InitDecl = dyn_cast(InitStmt->getSingleDecl())) { - QualType InductionVarType = InitDecl->getType().getCanonicalType(); - - // Compare with first loop type - if (BaseInductionVarType.isNull()) { - BaseInductionVarType = InductionVarType; - } else if (!Context.hasSameType(BaseInductionVarType, - InductionVarType)) { - Diag(InitDecl->getBeginLoc(), - diag::warn_omp_different_loop_ind_var_types) - << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType - << InductionVarType; - } - } - } } else { auto *CXXFor = cast(LoopStmt); OriginalInits.back().push_back(CXXFor->getBeginStmt()); diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 2a2491d008a0b..4902d424373e5 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -70,15 +70,6 @@ void func() { for(int j = 0; j < 10; ++j); } - //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}} - //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}} - #pragma omp fuse - { - for(int i = 0; i < 10; ++i); - for(unsigned int j = 0; j < 10; ++j); - for(long long k = 0; k < 100; ++k); - } - //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} #pragma omp fuse { @@ -123,6 +114,40 @@ void func() { for(int j = 0; j < 100; ++j); for(int k = 0; k < 50; ++k); } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}} + #pragma omp fuse looprange(1,6) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + // This fusion results in 2 loops + #pragma omp fuse looprange(1,2) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(2,3) + { + #pragma omp unroll partial(2) + for(int i = 0; i < 10; ++i); + + #pragma omp reverse + for(int j = 0; j < 10; ++j); + + #pragma omp fuse + { + { + #pragma omp reverse + for(int j = 0; j < 10; ++j); + } + for(int k = 0; k < 50; ++k); + } + } } // In a template context, but expression itself not instantiation-dependent >From 1c8f0fe23fdd78de1512505f128fa9e8bff655f9 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 20 Jun 2025 14:17:29 +0000 Subject: [PATCH 10/12] Address formatting issues --- clang/include/clang/Parse/Parser.h | 2 +- clang/lib/AST/StmtOpenMP.cpp | 10 +++++----- clang/lib/Parse/ParseOpenMP.cpp | 2 +- flang/lib/Lower/OpenMP/Clauses.cpp | 2 +- flang/lib/Parser/openmp-parsers.cpp | 8 +++----- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 08bee0078b5ff..9364007f3cf41 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6737,7 +6737,7 @@ class Parser : public CodeCompletionHandler { /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. OMPClause *ParseOpenMPLoopRangeClause(); - + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index f527e6361b5e5..1f49e9f2a0640 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -522,15 +522,15 @@ OMPFuseDirective *OMPFuseDirective::Create( NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); - // The number of top level canonical nests could + // The number of top level canonical nests could // not match the total number of generated loops // Example: // Before fusion: - // for (int i = 0; i < N; ++i) - // for (int j = 0; j < M; ++j) + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) // A[i][j] = i + j; - // - // for (int k = 0; k < P; ++k) + // + // for (int k = 0; k < P; ++k) // B[k] = k * 2; // Here, NumLoopNests = 2, but NumLoops = 3. Dir->setNumGeneratedLoopNests(NumLoopNests); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 2d6d624c1ecc8..48d9c184131cd 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3520,7 +3520,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, break; case OMPC_looprange: Clause = ParseOpenMPLoopRangeClause(); - break; + break; default: break; } diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index a38249bf2b588..c94d56cb57756 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -998,7 +998,7 @@ Link make(const parser::OmpClause::Link &inp, } LoopRange make(const parser::OmpClause::Looprange &inp, - semantics::SemanticsContext &semaCtx) { + semantics::SemanticsContext &semaCtx) { llvm_unreachable("Unimplemented: looprange"); } diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index d53389746dbec..39978e402e63b 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -847,10 +847,8 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) -TYPE_PARSER( - construct(scalarIntConstantExpr, - "," >> scalarIntConstantExpr) -) +TYPE_PARSER(construct( + scalarIntConstantExpr, "," >> scalarIntConstantExpr)) // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1026,7 +1024,7 @@ TYPE_PARSER( // "LINK" >> construct(construct( parenthesized(Parser{}))) || "LOOPRANGE" >> construct(construct( - parenthesized(Parser{}))) || + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( >From 009d8630c7ff97dedc543df04d0b18ab4579a503 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 20 Jun 2025 14:44:31 +0000 Subject: [PATCH 11/12] Address minor feedback part 2 --- clang/include/clang/AST/OpenMPClause.h | 8 ++++++-- clang/include/clang/AST/StmtOpenMP.h | 1 + clang/include/clang/Basic/DiagnosticSemaKinds.td | 10 +++++----- clang/lib/Sema/SemaOpenMP.cpp | 15 +++++---------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 3df5133a17fb4..478c41322f34a 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1197,12 +1197,16 @@ class OMPLoopRangeClause final void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } - /// Get looprange arguments: first and count + /// Get looprange 'first' expression Expr *getFirst() const { return getArgs()[0]; } + + /// Get looprange 'count' expression Expr *getCount() const { return getArgs()[1]; } - /// Set looprange arguments: first and count + /// Set looprange 'first' expression void setFirst(Expr *E) { getArgs()[0] = E; } + + /// Set looprange 'count' expression void setCount(Expr *E) { getArgs()[1] = E; } MutableArrayRef getArgs() { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 0421c06245cac..5ec3677fc7507 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -976,6 +976,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop /// transformation void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2bd0f895204c9..d807b6b076724 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,16 +11612,16 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; -def err_omp_not_canonical_loop : Error < +def err_omp_not_canonical_loop : Error< "loop after '#pragma omp %0' is not in canonical form">; -def err_omp_not_a_loop_sequence : Error < +def err_omp_not_a_loop_sequence : Error< "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; -def err_omp_empty_loop_sequence : Error < +def err_omp_empty_loop_sequence : Error< "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; -def err_omp_invalid_looprange : Error < +def err_omp_invalid_looprange : Error< "loop range in '#pragma omp %0' exceeds the number of available loops: " "range end '%1' is greater than the total number of loops '%2'">; -def warn_omp_redundant_fusion : Warning < +def warn_omp_redundant_fusion : Warning< "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, InGroup; def err_omp_not_for : Error< diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5f36d968c68fa..8aa21c5c01220 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,7 +22,6 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -48,7 +47,6 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include -#include using namespace clang; using namespace llvm::omp; @@ -14201,7 +14199,6 @@ template static bool tryHandleAs(T *t, F &&) { return false; } -/// /// Tries to recursively cast `t` to one of the given types and invokes `f` if /// successful. /// @@ -14274,7 +14271,7 @@ bool SemaOpenMP::checkTransformableLoopNest( /// Counts the total number of nested loops, including the outermost loop (the /// original loop). PRECONDITION of this visitor is that it must be invoked from -/// the original loop to be analyzed. The traversal is stop for Decl's and +/// the original loop to be analyzed. The traversal stops for Decl's and /// Expr's given that they may contain inner loops that must not be counted. /// /// Example AST structure for the code: @@ -15945,7 +15942,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Select the type with the largest bit width among all induction variables QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); - for (unsigned int I = FirstVal; I < LastVal; ++I) { + for (unsigned I = FirstVal; I < LastVal; ++I) { QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { IVType = CurrentIVType; @@ -16054,9 +16051,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, auto [IVVD, IVDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); - if (!LBVD || !STVD || !NIVD || !IVVD) - assert(LBVD && STVD && NIVD && IVVD && - "OpenMP Fuse Helper variables creation failed"); + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); UBVarDecls.push_back(UBVD); LBVarDecls.push_back(LBVD); @@ -16097,11 +16093,10 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // original.indexk = ivk // body(k); Expr *InitVal = IntegerLiteral::Create(Context, // llvm::APInt(IVWidth, 0), - // } // 1. Create the initialized fuse index - const std::string IndexName = Twine(".omp.fuse.index").str(); + StringRef IndexName = ".omp.fuse.index"; Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), IVType, SourceLocation()); VarDecl *IndexDecl = >From eb464451c30c19745591738d71e3496d4d11514a Mon Sep 17 00:00:00 2001 From: Roger Ferrer Ibanez Date: Mon, 14 Jul 2025 11:34:36 +0000 Subject: [PATCH 12/12] Address some of the feedback --- clang/include/clang/AST/OpenMPClause.h | 46 ++++--------- clang/include/clang/Sema/SemaOpenMP.h | 2 +- clang/lib/AST/OpenMPClause.cpp | 11 ++- clang/lib/Sema/SemaOpenMP.cpp | 92 ++++++++++++-------------- 4 files changed, 63 insertions(+), 88 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 478c41322f34a..5034ff9bacbfc 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1154,11 +1154,8 @@ class OMPFullClause final : public OMPNoChildClause { /// for(int k = 127; k >= 0; --k) /// \endcode class OMPLoopRangeClause final - : public OMPClause, - private llvm::TrailingObjects { + : public OMPClause { friend class OMPClauseReader; - friend class llvm::TrailingObjects; - /// Location of '(' SourceLocation LParenLoc; @@ -1166,24 +1163,25 @@ class OMPLoopRangeClause final SourceLocation FirstLoc, CountLoc; /// Number of looprange arguments (always 2: first, count) - unsigned NumArgs = 2; + static constexpr unsigned NumArgs = 2; + Stmt *Args[NumArgs] = {nullptr, nullptr}; - /// Set the argument expressions. - void setArgs(ArrayRef Args) { - assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments"); - std::copy(Args.begin(), Args.end(), getTrailingObjects()); - } + /// Set looprange 'first' expression + void setFirst(Expr *E) { Args[0] = E; } + + /// Set looprange 'count' expression + void setCount(Expr *E) { Args[1] = E; } /// Build an empty clause for deserialization. explicit OMPLoopRangeClause() - : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {} + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} public: /// Build a 'looprange' clause AST node. static OMPLoopRangeClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, - SourceLocation EndLoc, ArrayRef Args); + SourceLocation EndLoc, Expr* First, Expr* Count); /// Build an empty 'looprange' clause node. static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); @@ -1198,32 +1196,16 @@ class OMPLoopRangeClause final void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } /// Get looprange 'first' expression - Expr *getFirst() const { return getArgs()[0]; } + Expr *getFirst() const { return cast_or_null(Args[0]); } /// Get looprange 'count' expression - Expr *getCount() const { return getArgs()[1]; } - - /// Set looprange 'first' expression - void setFirst(Expr *E) { getArgs()[0] = E; } - - /// Set looprange 'count' expression - void setCount(Expr *E) { getArgs()[1] = E; } - - MutableArrayRef getArgs() { - return MutableArrayRef(getTrailingObjects(), NumArgs); - } - ArrayRef getArgs() const { - return ArrayRef(getTrailingObjects(), NumArgs); - } + Expr *getCount() const { return cast_or_null(Args[1]); } child_range children() { - return child_range(reinterpret_cast(getArgs().begin()), - reinterpret_cast(getArgs().end())); + return child_range(Args, Args + NumArgs); } const_child_range children() const { - auto AR = getArgs(); - return const_child_range(reinterpret_cast(AR.begin()), - reinterpret_cast(AR.end())); + return const_child_range(Args, Args + NumArgs); } child_range used_children() { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index f848c4a7d715e..09c97dc2e119b 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1494,7 +1494,7 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); - /// @brief Categories of loops encountered during semantic OpenMP loop + /// Categories of loops encountered during semantic OpenMP loop /// analysis /// /// This enumeration identifies the structural category of a loop or sequence diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index e0570262b2a05..4eb249948932e 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1028,23 +1028,20 @@ OMPLoopRangeClause * OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc, - ArrayRef Args) { - - assert(Args.size() == 2 && - "looprange clause must have exactly two arguments"); + Expr *First, Expr* Count) { OMPLoopRangeClause *Clause = CreateEmpty(C); Clause->setLocStart(StartLoc); Clause->setLParenLoc(LParenLoc); Clause->setFirstLoc(FirstLoc); Clause->setCountLoc(CountLoc); Clause->setLocEnd(EndLoc); - Clause->setArgs(Args); + Clause->setFirst(First); + Clause->setCount(Count); return Clause; } OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { - void *Mem = C.Allocate(totalSizeToAlloc(2)); - return new (Mem) OMPLoopRangeClause(); + return new (C) OMPLoopRangeClause(); } OMPAllocateClause *OMPAllocateClause::Create( diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 8aa21c5c01220..edffb041b3664 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14195,29 +14195,29 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( } /// Overloaded base case function -template static bool tryHandleAs(T *t, F &&) { +template static bool tryHandleAs(T *, F &&) { return false; } -/// Tries to recursively cast `t` to one of the given types and invokes `f` if -/// successful. +/// Tries to recursively cast `Type` to one of the given types and invokes +/// `Func` if successful. /// -/// @tparam Class The first type to check. -/// @tparam Rest The remaining types to check. -/// @tparam T The base type of `t`. -/// @tparam F The callable type for the function to invoke upon a successful +/// \tparam Class The first type to check. +/// \tparam Rest The remaining types to check. +/// \tparam T The base type of `Type`. +/// \tparam F The callable type for the function to invoke upon a successful /// cast. -/// @param t The object to be checked. -/// @param f The function to invoke if `t` matches `Class`. -/// @return `true` if `t` matched any type and `f` was called, otherwise +/// \param Type The object to be checked. +/// \param Func The function to invoke if `Type` matches `Class`. +/// \return `true` if `Type` matched any type and `Func` was called, otherwise /// `false`. template -static bool tryHandleAs(T *t, F &&f) { - if (Class *c = dyn_cast(t)) { - f(c); +static bool tryHandleAs(T *Type, F &&Func) { + if (Class *C = dyn_cast(Type)) { + Func(C); return true; } - return tryHandleAs(t, std::forward(f)); + return tryHandleAs(Type, std::forward(Func)); } /// Updates OriginalInits by checking Transform against loop transformation @@ -14297,7 +14297,7 @@ bool SemaOpenMP::checkTransformableLoopNest( /// } /// } /// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops -class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { +class NestedLoopCounterVisitor final : public DynamicRecursiveASTVisitor { private: unsigned NestedLoopCount = 0; @@ -14385,22 +14385,21 @@ bool SemaOpenMP::analyzeLoopSequence( LoopSeqSize += NumGeneratedLoopNests; NumLoops += NumGeneratedLoops; return true; - } else { - // Unroll full (0 loops produced) - Diag(Child->getBeginLoc(), diag::err_omp_not_for) - << 0 << getOpenMPDirectiveName(Kind); - return false; } + // Unroll full (0 loops produced) + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; } // Handle loop transformations with multiple loop nests // Unroll full - if (NumGeneratedLoopNests <= 0) { + if (!NumGeneratedLoopNests) { Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; } // Loop transformatons such as split or loopranged fuse - else if (NumGeneratedLoopNests > 1) { + if (NumGeneratedLoopNests > 1) { // Get the preinits related to this loop sequence generating // loop transformation (i.e loopranged fuse, split...) LoopSequencePreInits.emplace_back(); @@ -14413,30 +14412,29 @@ bool SemaOpenMP::analyzeLoopSequence( LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind); - } else { - // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) - // Process the transformed loop statement - OriginalInits.emplace_back(); - TransformsPreInits.emplace_back(); - LoopHelpers.emplace_back(); - LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); - - unsigned IsCanonical = - checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, - *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); - - if (!IsCanonical) { - Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) - << getOpenMPDirectiveName(Kind); - return false; - } - StoreLoopStatements(TransformedStmt); - updatePreInits(LoopTransform, TransformsPreInits); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + // Process the transformed loop statement + OriginalInits.emplace_back(); + TransformsPreInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); - NumLoops += NumGeneratedLoops; - ++LoopSeqSize; - return true; + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, + *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; } + StoreLoopStatements(TransformedStmt); + updatePreInits(LoopTransform, TransformsPreInits); + + NumLoops += NumGeneratedLoops; + ++LoopSeqSize; + return true; }; /// Modularized code for handling regular canonical loops @@ -16303,7 +16301,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Only TransformSingleLoop requires inserting pre-inits here if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { - auto TransformPreInit = TransformsPreInits[TransformIndex++]; + const auto &TransformPreInit = TransformsPreInits[TransformIndex++]; if (!TransformPreInit.empty()) { llvm::append_range(PreInits, TransformPreInit); } @@ -17456,15 +17454,13 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( if (CountVal.isInvalid()) Count = nullptr; - SmallVector ArgsVec = {First, Count}; - // OpenMP [6.0, Restrictions] // first + count - 1 must not evaluate to a value greater than the // loop sequence length of the associated canonical loop sequence. // This check must be performed afterwards due to the delayed // parsing and computation of the associated loop sequence return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, - FirstLoc, CountLoc, EndLoc, ArgsVec); + FirstLoc, CountLoc, EndLoc, First, Count); } OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, From openmp-commits at lists.llvm.org Mon Jul 14 04:37:14 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?B?Um9nZXIgRmVycmVyIEliw6HDsWV6?= via Openmp-commits) Date: Mon, 14 Jul 2025 04:37:14 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6874ebea.050a0220.22ff3f.296d@mx.google.com> rofirrim wrote: Hi all, @eZWALT is changing jobs and kindly asked me if I could finish on his behalf. I plan to go through all the items of feedback and then rebase against main. https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Mon Jul 14 06:22:37 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?B?Um9nZXIgRmVycmVyIEliw6HDsWV6?= via Openmp-commits) Date: Mon, 14 Jul 2025 06:22:37 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6875049d.170a0220.1768e9.fc15@mx.google.com> https://github.com/rofirrim updated https://github.com/llvm/llvm-project/pull/139293 >From fb91129401f61b332fc1147e5a81d553abd7658a Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:25:33 +0000 Subject: [PATCH 01/13] Add fuse directive patch --- clang/include/clang-c/Index.h | 4 + clang/include/clang/AST/RecursiveASTVisitor.h | 3 + clang/include/clang/AST/StmtOpenMP.h | 99 +- .../clang/Basic/DiagnosticSemaKinds.td | 8 + clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Sema/SemaOpenMP.h | 27 + .../include/clang/Serialization/ASTBitCodes.h | 1 + clang/lib/AST/StmtOpenMP.cpp | 25 + clang/lib/AST/StmtPrinter.cpp | 5 + clang/lib/AST/StmtProfile.cpp | 4 + clang/lib/Basic/OpenMPKinds.cpp | 2 +- clang/lib/CodeGen/CGStmt.cpp | 3 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 + clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 600 +++++++ clang/lib/Sema/TreeTransform.h | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 11 + clang/lib/Serialization/ASTWriterStmt.cpp | 6 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 1 + clang/test/OpenMP/fuse_ast_print.cpp | 278 +++ clang/test/OpenMP/fuse_codegen.cpp | 1511 +++++++++++++++++ clang/test/OpenMP/fuse_messages.cpp | 76 + clang/tools/libclang/CIndex.cpp | 7 + clang/tools/libclang/CXCursor.cpp | 3 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 4 + .../runtime/test/transform/fuse/foreach.cpp | 192 +++ openmp/runtime/test/transform/fuse/intfor.c | 50 + .../runtime/test/transform/fuse/iterfor.cpp | 194 +++ .../fuse/parallel-wsloop-collapse-foreach.cpp | 208 +++ .../fuse/parallel-wsloop-collapse-intfor.c | 45 + 31 files changed, 3387 insertions(+), 2 deletions(-) create mode 100644 clang/test/OpenMP/fuse_ast_print.cpp create mode 100644 clang/test/OpenMP/fuse_codegen.cpp create mode 100644 clang/test/OpenMP/fuse_messages.cpp create mode 100644 openmp/runtime/test/transform/fuse/foreach.cpp create mode 100644 openmp/runtime/test/transform/fuse/intfor.c create mode 100644 openmp/runtime/test/transform/fuse/iterfor.cpp create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp create mode 100644 openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index e4cb4327fbaac..148b89ab9cfa4 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2162,6 +2162,10 @@ enum CXCursorKind { */ CXCursor_OMPStripeDirective = 310, + /** OpenMP fuse directive + */ + CXCursor_OMPFuseDirective = 318, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 5cb2f57edffe4..918216e8df4aa 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3090,6 +3090,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective, DEF_TRAVERSE_STMT(OMPReverseDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPFuseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPInterchangeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index e2fd2114026f7..cb8bb91f4768c 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Number of loops generated by this loop transformation. unsigned NumGeneratedLoops = 0; + /// Number of top level canonical loop nests generated by this loop + /// transformation + unsigned NumGeneratedLoopNests = 0; protected: explicit OMPLoopTransformationDirective(StmtClass SC, @@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop + /// transformation + void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } public: /// Return the number of associated (consumed) loops. @@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Return the number of loops generated by this loop transformation. unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; } + /// Return the number of top level canonical loop nests generated by this loop + /// transformation + unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; } + /// Get the de-sugared statements after the loop transformation. /// /// Might be nullptr if either the directive generates no loops and is handled @@ -995,7 +1005,8 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass; + C == OMPStripeDirectiveClass || + C == OMPFuseDirectiveClass; } }; @@ -5562,6 +5573,7 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5793,6 +5805,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_reverse, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5865,6 +5878,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5915,6 +5929,89 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp fuse' loop transformation directive +/// +/// \code{c} +/// #pragma omp fuse +/// { +/// for(int i = 0; i < m1; ++i) {...} +/// for(int j = 0; j < m2; ++j) {...} +/// ... +/// } +/// \endcode + +class OMPFuseDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + // Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPLoopTransformationDirective(OMPFuseDirectiveClass, + llvm::omp::OMPD_fuse, StartLoc, EndLoc, + NumLoops) { + // This default initialization assumes simple loop fusion. + // If a 'looprange' clause is specified, these values must be explicitly set + setNumGeneratedLoopNests(1); + setNumGeneratedLoops(NumLoops); + } + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for #pragma omp fuse' + /// + /// \param C Context of the AST + /// \param StartLoc Location of the introducer (e.g the 'omp' token) + /// \param EndLoc Location of the directive's end (e.g the tok::eod) + /// \param Clauses The directive's clauses + /// \param NumLoops Number of total affected loops + /// \param NumLoopNests Number of affected top level canonical loops + /// (number of items in the 'looprange' clause if present) + /// \param AssociatedStmt The outermost associated loop + /// \param TransformedStmt The loop nest after fusion, or nullptr in + /// dependent + /// \param PreInits Helper preinits statements for the loop nest + static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, + Stmt *PreInits); + + /// Build an empty '#pragma omp fuse' AST node for deserialization + /// + /// \param C Context of the AST + /// \param NumClauses Number of clauses to allocate + /// \param NumLoops Number of associated loops to allocate + static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops); + + /// Gets the associated loops after the transformation. This is the de-sugared + /// replacement or nulltpr in dependent contexts. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPFuseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 979ff60b73b75..fe9ca29038a1f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,6 +11612,14 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; +def warn_omp_different_loop_ind_var_types : Warning < + "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">; +def err_omp_not_canonical_loop : Error < + "loop after '#pragma omp %0' is not in canonical form">; +def err_omp_not_a_loop_sequence : Error < + "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; +def err_omp_empty_loop_sequence : Error < + "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index c9c173f5c7469..45d1a813e4b1f 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -233,6 +233,7 @@ def OMPStripeDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; def OMPInterchangeDirective : StmtNode; +def OMPFuseDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 7b169f56b6807..ea21377a8db9c 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase { Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + + /// Called on well-formed '#pragma omp fuse' after parsing of its + /// clauses and the associated statement. + StmtResult ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -1481,6 +1488,26 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); + /// Analyzes and checks a loop sequence for use by a loop transformation + /// + /// \param Kind The loop transformation directive kind. + /// \param NumLoops [out] Number of total canonical loops + /// \param LoopSeqSize [out] Number of top level canonical loops + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param LoopStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop. + /// \param Context + /// \return Whether there was an absence of errors or not + bool checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + ASTContext &Context); + /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. struct OMPDeclareVariantScope { diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 9d265f27b8e31..83b73554d693c 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1948,6 +1948,7 @@ enum StmtCode { STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, + STMT_OMP_FUSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 2eeb5e45ab511..276e43ec9f7d5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -456,6 +456,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, auto *Dir = createDirective( C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); + // The number of generated loops and loop nests during unroll matches + Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); return Dir; @@ -508,6 +510,29 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPFuseDirective *OMPFuseDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { + + OMPFuseDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + Dir->setNumGeneratedLoopNests(NumLoopNests); + Dir->setNumGeneratedLoops(NumLoops); + return Dir; +} + +OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops) { + return createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 28317911d825b..4f57c63154da0 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -790,6 +790,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { + Indent() << "#pragma omp fuse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c61450e19f1b6..c5d1d5b48508e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1026,6 +1026,10 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index a451fc7c01841..d172450512f13 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -702,7 +702,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe; + DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 8742f8e0fc04a..aa12d62d1b865 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -234,6 +234,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast(*S)); break; + case Stmt::OMPFuseDirectiveClass: + EmitOMPFuseDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d9195d749e056..cf03d5d3d88a3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -198,6 +198,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } else if (const auto *Interchange = dyn_cast(&S)) { PreInits = Interchange->getPreInits(); + } else if (const auto *Fuse = dyn_cast(&S)) { + PreInits = Fuse->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2922,6 +2924,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective( EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) { + // Emit the de-sugared statement + OMPTransformDirectiveScopeRAII FuseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a5ab9df01dba9..fe753e5b688b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3855,6 +3855,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); + void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPScopeDirective(const OMPScopeDirective &S); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 0a6cea8869c14..3eb59156c04af 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1493,6 +1493,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 00f4658180807..84ac9587bd54d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4404,6 +4404,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: + case OMPD_fuse: case OMPD_assume: break; default: @@ -6221,6 +6222,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_fuse: + Res = + ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -14230,6 +14235,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); @@ -14240,6 +14247,265 @@ bool SemaOpenMP::checkTransformableLoopNest( return Result; } +class NestedLoopCounterVisitor + : public clang::RecursiveASTVisitor { +public: + explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {} + + bool VisitForStmt(clang::ForStmt *FS) { + ++NestedLoopCount; + return true; + } + + bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) { + ++NestedLoopCount; + return true; + } + + unsigned getNestedLoopCount() const { return NestedLoopCount; } + +private: + unsigned NestedLoopCount; +}; + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + ASTContext &Context) { + + // Checks whether the given statement is a compound statement + VarsWithInheritedDSAType TmpDSA; + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + // Callback for updating pre-inits in case there are even more + // loop-sequence-generating-constructs inside of the main compound stmt + auto OnTransformationCallback = + [&OriginalInits](OMPLoopBasedDirective *Transform) { + Stmt *DependentPreInits; + if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); + else + llvm_unreachable("Unhandled loop transformation"); + + appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + }; + + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. handleLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure + + auto NLCV = NestedLoopCounterVisitor(); + // Helper functions to validate canonical loop sequence grammar is valid + auto isLoopSequenceDerivation = [](auto *Child) { + return isa(Child) || isa(Child) || + isa(Child); + }; + auto isLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + + // Helper Lambda to handle storing initialization and body statements for both + // ForStmt and CXXForRangeStmt and checks for any possible mismatch between + // induction variables types + QualType BaseInductionVarType; + auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, + this, &Context](Stmt *LoopStmt) { + if (auto *For = dyn_cast(LoopStmt)) { + OriginalInits.back().push_back(For->getInit()); + ForStmts.push_back(For); + // Extract induction variable + if (auto *InitStmt = dyn_cast_or_null(For->getInit())) { + if (auto *InitDecl = dyn_cast(InitStmt->getSingleDecl())) { + QualType InductionVarType = InitDecl->getType().getCanonicalType(); + + // Compare with first loop type + if (BaseInductionVarType.isNull()) { + BaseInductionVarType = InductionVarType; + } else if (!Context.hasSameType(BaseInductionVarType, + InductionVarType)) { + Diag(InitDecl->getBeginLoc(), + diag::warn_omp_different_loop_ind_var_types) + << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType + << InductionVarType; + } + } + } + + } else { + assert(isa(LoopStmt) && + "Expected canonical for or range-based for loops."); + auto *CXXFor = dyn_cast(LoopStmt); + OriginalInits.back().push_back(CXXFor->getBeginStmt()); + ForStmts.push_back(CXXFor); + } + }; + // Helper lambda functions to encapsulate the processing of different + // derivations of the canonical loop sequence grammar + // + // Modularized code for handling loop generation and transformations + auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers, + &OriginalInits, &LoopSeqSize, &NumLoops, Kind, + &TmpDSA, &OnTransformationCallback, + this](Stmt *Child) { + auto LoopTransform = dyn_cast(Child); + Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); + unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); + + // Handle the case where transformed statement is not available due to + // dependent contexts + if (!TransformedStmt) { + if (NumGeneratedLoopNests > 0) + return true; + // Unroll full + else { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + // Handle loop transformations with multiple loop nests + // Unroll full + if (NumGeneratedLoopNests <= 0) { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + // Future loop transformations that generate multiple canonical loops + } else if (NumGeneratedLoopNests > 1) { + llvm_unreachable("Multiple canonical loop generating transformations " + "like loop splitting are not yet supported"); + } + + // Process the transformed loop statement + Child = TransformedStmt; + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + OnTransformationCallback(LoopTransform); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(TransformedStmt); + NumLoops += LoopTransform->getNumGeneratedLoops(); + return true; + }; + + // Modularized code for handling regular canonical loops + auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV, + this](Stmt *Child) { + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(Child); + NumLoops += NLCV.TraverseStmt(Child); + return true; + }; + + // Helper function to process a Loop Sequence Recursively + auto handleLoopSequence = [&](Stmt *LoopSeqStmt, + auto &handleLoopSequenceCallback) -> bool { + for (auto *Child : LoopSeqStmt->children()) { + if (!Child) + continue; + + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { + if (isLoopGeneratingStmt(Child)) { + if (!handleLoopGeneration(Child)) { + return false; + } + } else { + if (!handleRegularLoop(Child)) { + return false; + } + } + ++LoopSeqSize; + } else { + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + return true; + }; + + // Recursive entry point to process the main loop sequence + if (!handleLoopSequence(AStmt, handleLoopSequence)) { + return false; + } + + if (LoopSeqSize <= 0) { + Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + return true; +} + /// Add preinit statements that need to be propageted from the selected loop. static void addLoopPreInits(ASTContext &Context, OMPLoopBasedDirective::HelperExprs &LoopHelper, @@ -15499,6 +15765,340 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + DeclContext *CurrContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + CaptureVars CopyTransformer(SemaRef); + + // Ensure the structured block is not empty + if (!AStmt) { + return StmtError(); + } + // Validate that the potential loop sequence is transformable for fusion + // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops + SmallVector LoopHelpers; + SmallVector LoopStmts; + SmallVector> OriginalInits; + + unsigned NumLoops; + // TODO: Support looprange clause using LoopSeqSize + unsigned LoopSeqSize; + if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, + LoopHelpers, LoopStmts, OriginalInits, + Context)) { + return StmtError(); + } + + // Defer transformation in dependent contexts + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, 1, AStmt, nullptr, nullptr); + } + assert(LoopHelpers.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + assert(OriginalInits.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. + SmallVector PreInits; + + // Select the type with the largest bit width among all induction variables + QualType IVType = LoopHelpers[0].IterationVarRef->getType(); + for (unsigned int I = 1; I < LoopSeqSize; ++I) { + QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); + if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { + IVType = CurrentIVType; + } + } + uint64_t IVBitWidth = Context.getIntWidth(IVType); + + // Create pre-init declarations for all loops lower bounds, upper bounds, + // strides and num-iterations + SmallVector LBVarDecls; + SmallVector STVarDecls; + SmallVector NIVarDecls; + SmallVector UBVarDecls; + SmallVector IVVarDecls; + + // Helper lambda to create variables for bounds, strides, and other + // expressions. Generates both the variable declaration and the corresponding + // initialization statement. + auto CreateHelperVarAndStmt = + [&SemaRef = this->SemaRef, &Context, &CopyTransformer, + &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I, + bool NeedsNewVD = false) { + Expr *TransformedExpr = + AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); + if (!TransformedExpr) + return std::pair(nullptr, StmtError()); + + auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str(); + + VarDecl *VD; + if (NeedsNewVD) { + VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name); + SemaRef.AddInitializerToDecl(VD, TransformedExpr, false); + + } else { + // Create a unique variable name + DeclRefExpr *DRE = cast(TransformedExpr); + VD = cast(DRE->getDecl()); + VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name)); + } + // Create the corresponding declaration statement + StmtResult DeclStmt = new (Context) class DeclStmt( + DeclGroupRef(VD), SourceLocation(), SourceLocation()); + return std::make_pair(VD, DeclStmt); + }; + + // Process each single loop to generate and collect declarations + // and statements for all helper expressions + for (unsigned int I = 0; I < LoopSeqSize; ++I) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I); + auto [NIVD, NIDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true); + auto [IVVD, IVDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I); + + if (!LBVD || !STVD || !NIVD || !IVVD) + return StmtError(); + + UBVarDecls.push_back(UBVD); + LBVarDecls.push_back(LBVD); + STVarDecls.push_back(STVD); + NIVarDecls.push_back(NIVD); + IVVarDecls.push_back(IVVD); + + PreInits.push_back(UBDStmt.get()); + PreInits.push_back(LBDStmt.get()); + PreInits.push_back(STDStmt.get()); + PreInits.push_back(NIDStmt.get()); + PreInits.push_back(IVDStmt.get()); + } + + auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) { + return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(), + false); + }; + + // Following up the creation of the final fused loop will be performed + // which has the following shape (considering the selected loops): + // + // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) { + // if (fuse.index < ni0){ + // iv0 = lb0 + st0 * fuse.index; + // original.index0 = iv0 + // body(0); + // } + // if (fuse.index < ni1){ + // iv1 = lb1 + st1 * fuse.index; + // original.index1 = iv1 + // body(1); + // } + // + // ... + // + // if (fuse.index < nik){ + // ivk = lbk + stk * fuse.index; + // original.indexk = ivk + // body(k); Expr *InitVal = IntegerLiteral::Create(Context, + // llvm::APInt(IVWidth, 0), + + // } + + // 1. Create the initialized fuse index + const std::string IndexName = Twine(".omp.fuse.index").str(); + Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), + IVType, SourceLocation()); + VarDecl *IndexDecl = + buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr); + SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation()); + + if (!InitStmt.isUsable()) + return StmtError(); + + auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType, + Loc = InitVal->getExprLoc()]() { + return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false); + }; + + // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2, + // ..., NI_k) + // + // This loop accumulates the maximum value across multiple expressions, + // ensuring each step constructs a unique AST node for correctness. By using + // intermediate temporary variables and conditional operators, we maintain + // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c): + // omp.temp0 = max(a, b) + // omp.temp1 = max(omp.temp0, c) + // omp.fuse.max = max(omp.temp1, omp.temp0) + + ExprResult MaxExpr; + for (unsigned I = 0; I < LoopSeqSize; ++I) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]); + QualType NITy = NIRef->getType(); + + if (MaxExpr.isUnset()) { + // Initialize MaxExpr with the first NI expression + MaxExpr = NIRef; + } else { + // Create a new acummulator variable t_i = MaxExpr + std::string TempName = (Twine(".omp.temp.") + Twine(I)).str(); + VarDecl *TempDecl = + buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); + TempDecl->setInit(MaxExpr.get()); + DeclRefExpr *TempRef = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + DeclRefExpr *TempRef2 = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + // Add a DeclStmt to PreInits to ensure the variable is declared. + StmtResult TempStmt = new (Context) + DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation()); + + if (!TempStmt.isUsable()) + return StmtError(); + PreInits.push_back(TempStmt.get()); + + // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef) + ExprResult Comparison = + SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef); + // Handle any errors in Comparison creation + if (!Comparison.isUsable()) + return StmtError(); + + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]); + // Update MaxExpr using a conditional expression to hold the max value + MaxExpr = new (Context) ConditionalOperator( + Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), + NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary); + + if (!MaxExpr.isUsable()) + return StmtError(); + } + } + if (!MaxExpr.isUsable()) + return StmtError(); + + // 3. Declare the max variable + const std::string MaxName = Twine(".omp.fuse.max").str(); + VarDecl *MaxDecl = + buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr); + MaxDecl->setInit(MaxExpr.get()); + DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false); + StmtResult MaxStmt = new (Context) + DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation()); + + if (MaxStmt.isInvalid()) + return StmtError(); + PreInits.push_back(MaxStmt.get()); + + // 4. Create condition Expr: index < n_max + ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, + MakeIVRef(), MaxRef); + if (!CondExpr.isUsable()) + return StmtError(); + // 5. Increment Expr: ++index + ExprResult IncrExpr = + SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + // 6. Build the Fused Loop Body + // The final fused loop iterates over the maximum logical range. Inside the + // loop, each original loop's index is calculated dynamically, and its body + // is executed conditionally. + // + // Each sub-loop's body is guarded by a conditional statement to ensure + // it executes only within its logical iteration range: + // + // if (fuse.index < ni_k){ + // iv_k = lb_k + st_k * fuse.index; + // original.index = iv_k + // body(k); + // } + + CompoundStmt *FusedBody = nullptr; + SmallVector FusedBodyStmts; + for (unsigned I = 0; I < LoopSeqSize; ++I) { + + // Assingment of the original sub-loop index to compute the logical index + // IV_k = LB_k + omp.fuse.index * ST_k + + ExprResult IdxExpr = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, + MakeVarDeclRef(STVarDecls[I]), MakeIVRef()); + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, + MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get()); + + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, + MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get()); + if (!IdxExpr.isUsable()) + return StmtError(); + + // Update the original i_k = IV_k + SmallVector BodyStmts; + BodyStmts.push_back(IdxExpr.get()); + llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + + if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) + BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); + + Stmt *Body = (isa(LoopStmts[I])) + ? cast(LoopStmts[I])->getBody() + : cast(LoopStmts[I])->getBody(); + + BodyStmts.push_back(Body); + + CompoundStmt *CombinedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + ExprResult Condition = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), + MakeVarDeclRef(NIVarDecls[I])); + + if (!Condition.isUsable()) + return StmtError(); + + IfStmt *IfStatement = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr, + Condition.get(), SourceLocation(), SourceLocation(), CombinedBody, + SourceLocation(), nullptr); + + FusedBodyStmts.push_back(IfStatement); + } + FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + + // 7. Construct the final fused loop + ForStmt *FusedForStmt = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(), + FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), + IncrExpr.get()->getEndLoc()); + + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + 1, AStmt, FusedForStmt, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 3e33fb73e01b4..45f556f22c511 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9675,6 +9675,17 @@ StmtResult TreeTransform::TransformOMPInterchangeDirective( return Res; } +template +StmtResult +TreeTransform::TransformOMPFuseDirective(OMPFuseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 44cfb83ad2db4..291bd8ea4bf18 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2445,6 +2445,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); D->setNumGeneratedLoops(Record.readUInt32()); + D->setNumGeneratedLoopNests(Record.readUInt32()); } void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2467,6 +2468,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3608,6 +3613,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = OMPReverseDirective::CreateEmpty(Context, NumLoops); break; } + case STMT_OMP_FUSE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops); + break; + } case STMT_OMP_INTERCHANGE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index a6e320c7f3eb0..5bf1ecfb968e8 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2449,6 +2449,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); Record.writeUInt32(D->getNumGeneratedLoops()); + Record.writeUInt32(D->getNumGeneratedLoopNests()); } void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2476,6 +2477,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_FUSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index c77ef26da568d..7218d7e62acdd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1814,6 +1814,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp new file mode 100644 index 0000000000000..43ce815dab024 --- /dev/null +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -0,0 +1,278 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2() { + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + // DUMP-NEXT: OMPPartialClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 4 + // DUMP-NEXT: IntegerLiteral {{.*}} 4 + #pragma omp unroll partial(4) + // PRINT: #pragma omp fuse + // DUMP-NEXT: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + +} + +//PRINT-LABEL: void foo3( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3 +template +void foo3() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp unroll partial(Factor1) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor1) + // PRINT: for (int i = 0; i < 12; i += 1) + // DUMP: ForStmt + for (int i = 0; i < 12; i += 1) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: #pragma omp unroll partial(Factor2) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor2) + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } +} + +// Also test instantiating the template. +void tfoo3() { + foo3<4,2>(); +} + +//PRINT-LABEL: void foo4( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4 +template +void foo4(int start, int end) { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (T i = start; i < end; i += Step) + // DUMP: ForStmt + for (T i = start; i < end; i += Step) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + + // PRINT: for (T j = end; j > start; j -= Step) + // DUMP: ForStmt + for (T j = end; j > start; j -= Step) { + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + + } +} + +// Also test instantiating the template. +void tfoo4() { + foo4(0, 64); +} + + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5() { + double arr[128], arr2[128]; + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT-NEXT: for (auto &&a : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&a: arr) + // PRINT: body(a) + // DUMP: CallExpr + body(a); + // PRINT: for (double v = 42; auto &&b : arr) + // DUMP: CXXForRangeStmt + for (double v = 42; auto &&b: arr) + // PRINT: body(b, v); + // DUMP: CallExpr + body(b, v); + // PRINT: for (auto &&c : arr2) + // DUMP: CXXForRangeStmt + for (auto &&c: arr2) + // PRINT: body(c) + // DUMP: CallExpr + body(c); + + } + +} + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionDecl {{.*}} foo6 +void foo6() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i <= 10; ++i) + // DUMP: ForStmt + for (int i = 0; i <= 10; ++i) + body(i); + // PRINT: for (int j = 0; j < 100; ++j) + // DUMP: ForStmt + for(int j = 0; j < 100; ++j) + body(j); + } + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(4) + // PRINT: for (int k = 0; k < 250; ++k) + // DUMP: ForStmt + for (int k = 0; k < 250; ++k) + body(k); + } +} + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + } + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + } + } + } + } + +} + + + + + +#endif \ No newline at end of file diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp new file mode 100644 index 0000000000000..6c1e21092da43 --- /dev/null +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -0,0 +1,1511 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +//placeholder for loop body code. +extern "C" void body(...) {} + +extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) { + int i,j; + #pragma omp fuse + { + for(i = start1; i < end1; i += step1) body(i); + for(j = start2; j < end2; j += step2) body(j); + } + +} + +template +void foo2(T start, T end, T step){ + T i,j,k; + #pragma omp fuse + { + for(i = start; i < end; i += step) body(i); + for(j = end; j > start; j -= step) body(j); + for(k = start+step; k < end+step; k += step) body(k); + } +} + +extern "C" void tfoo2() { + foo2(0, 64, 4); +} + +extern "C" void foo3() { + double arr[256]; + #pragma omp fuse + { + #pragma omp fuse + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK1: [[IF_THEN22]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END27]] +// CHECK1: [[IF_END27]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo2( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK1: [[COND_TRUE30]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32:.*]] +// CHECK1: [[COND_FALSE31]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32]] +// CHECK1: [[COND_END32]]: +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK1: [[IF_THEN40]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK1-NEXT: br label %[[IF_END45]] +// CHECK1: [[IF_END45]]: +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 +// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 +// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] +// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 +// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 +// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 +// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 +// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 +// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] +// CHECK1: [[COND_TRUE44]]: +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK1-NEXT: br label %[[COND_END46:.*]] +// CHECK1: [[COND_FALSE45]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: br label %[[COND_END46]] +// CHECK1: [[COND_END46]]: +// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] +// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] +// CHECK1: [[COND_TRUE50]]: +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END52:.*]] +// CHECK1: [[COND_FALSE51]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END52]] +// CHECK1: [[COND_END52]]: +// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] +// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 +// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 +// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] +// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] +// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 +// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 +// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 +// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] +// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN64]]: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 +// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] +// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] +// CHECK1: [[IF_THEN70]]: +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 +// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] +// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END75]] +// CHECK1: [[IF_END75]]: +// CHECK1-NEXT: br label %[[IF_END76]] +// CHECK1: [[IF_END76]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] +// CHECK1: [[IF_THEN78]]: +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] +// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 +// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] +// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) +// CHECK1-NEXT: br label %[[IF_END83]] +// CHECK1: [[IF_END83]]: +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] +// CHECK1: [[IF_THEN85]]: +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] +// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] +// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 +// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] +// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) +// CHECK1-NEXT: br label %[[IF_END90]] +// CHECK1: [[IF_END90]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK2: [[IF_THEN22]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END27]] +// CHECK2: [[IF_END27]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 +// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 +// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 +// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 +// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] +// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 +// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 +// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 +// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 +// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 +// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 +// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 +// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] +// CHECK2: [[COND_TRUE44]]: +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 +// CHECK2-NEXT: br label %[[COND_END46:.*]] +// CHECK2: [[COND_FALSE45]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: br label %[[COND_END46]] +// CHECK2: [[COND_END46]]: +// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] +// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] +// CHECK2: [[COND_TRUE50]]: +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END52:.*]] +// CHECK2: [[COND_FALSE51]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END52]] +// CHECK2: [[COND_END52]]: +// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] +// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 +// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 +// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 +// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] +// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] +// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 +// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 +// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 +// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] +// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN64]]: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 +// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] +// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] +// CHECK2: [[IF_THEN70]]: +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 +// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] +// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END75]] +// CHECK2: [[IF_END75]]: +// CHECK2-NEXT: br label %[[IF_END76]] +// CHECK2: [[IF_END76]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] +// CHECK2: [[IF_THEN78]]: +// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] +// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 +// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] +// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) +// CHECK2-NEXT: br label %[[IF_END83]] +// CHECK2: [[IF_END83]]: +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] +// CHECK2: [[IF_THEN85]]: +// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] +// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] +// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 +// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] +// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 +// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) +// CHECK2-NEXT: br label %[[IF_END90]] +// CHECK2: [[IF_END90]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo2( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK2: [[COND_TRUE30]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32:.*]] +// CHECK2: [[COND_FALSE31]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32]] +// CHECK2: [[COND_END32]]: +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK2: [[IF_THEN40]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK2-NEXT: br label %[[IF_END45]] +// CHECK2: [[IF_END45]]: +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +//. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp new file mode 100644 index 0000000000000..50dedfd2c0dc6 --- /dev/null +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + ; + + // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + {int bar = 0;} + + // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + int x = 2; + } + + // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error at +2 {{expected statement}} + #pragma omp fuse + } + + // expected-warning at +1 {{extra tokens at the end of '#pragma omp fuse' are ignored}} + #pragma omp fuse foo + { + for (int i = 0; i < 7; ++i) + ; + } + + + // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} + #pragma omp fuse final(0) + { + for (int i = 0; i < 7; ++i) + ; + } + + //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}} + //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} + #pragma omp fuse + { + for(int i = 0; i < 10; i*=2) { + ; + } + } + + //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} + #pragma omp fuse + {} + + //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + #pragma omp unroll full + for(int i = 0; i < 10; ++i); + + for(int j = 0; j < 10; ++j); + } + + //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}} + //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + for(unsigned int j = 0; j < 10; ++j); + for(long long k = 0; k < 100; ++k); + } +} \ No newline at end of file diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3068621d9c004..3afa59b2f2d6c 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2211,6 +2211,7 @@ class EnqueueVisitor : public ConstStmtVisitor, void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); + void VisitOMPFuseDirective(const OMPFuseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -3369,6 +3370,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6323,6 +6328,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPInterchangeDirective: return cxstring::createRef("OMPInterchangeDirective"); + case CXCursor_OMPFuseDirective: + return cxstring::createRef("OMPFuseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index a6301daa672c3..a6d032fa302b1 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -687,6 +687,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPInterchangeDirectiveClass: K = CXCursor_OMPInterchangeDirective; break; + case Stmt::OMPFuseDirectiveClass: + K = CXCursor_OMPFuseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index a87111cb5a11d..6352be8069e9e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -855,6 +855,10 @@ def OMP_For : Directive<[Spelling<"for">]> { let category = CA_Executable; let languages = [L_C]; } +def OMP_Fuse : Directive<[Spelling<"fuse">]> { + let association = AS_Loop; + let category = CA_Executable; +} def OMP_Interchange : Directive<[Spelling<"interchange">]> { let allowedOnceClauses = [ VersionedClause, diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp new file mode 100644 index 0000000000000..cabf4bf8a511d --- /dev/null +++ b/openmp/runtime/test/transform/fuse/foreach.cpp @@ -0,0 +1,192 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (Reporter a{"C"}; auto &&v : Reporter("A")) + printf("v=%d\n", v); + for (Reporter aa{"D"}; auto &&vv : Reporter("B")) + printf("vv=%d\n", vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done + + +#endif diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c new file mode 100644 index 0000000000000..b8171b4df7042 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/intfor.c @@ -0,0 +1,50 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (int i = 5; i <= 25; i += 5) + printf("i=%d\n", i); + for (int j = 10; j < 100; j += 10) + printf("j=%d\n", j); + for (int k = 10; k > 0; --k) + printf("k=%d\n", k); + } + printf("done\n"); + return EXIT_SUCCESS; +} +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=5 +// CHECK-NEXT: j=10 +// CHECK-NEXT: k=10 +// CHECK-NEXT: i=10 +// CHECK-NEXT: j=20 +// CHECK-NEXT: k=9 +// CHECK-NEXT: i=15 +// CHECK-NEXT: j=30 +// CHECK-NEXT: k=8 +// CHECK-NEXT: i=20 +// CHECK-NEXT: j=40 +// CHECK-NEXT: k=7 +// CHECK-NEXT: i=25 +// CHECK-NEXT: j=50 +// CHECK-NEXT: k=6 +// CHECK-NEXT: j=60 +// CHECK-NEXT: k=5 +// CHECK-NEXT: j=70 +// CHECK-NEXT: k=4 +// CHECK-NEXT: j=80 +// CHECK-NEXT: k=3 +// CHECK-NEXT: j=90 +// CHECK-NEXT: k=2 +// CHECK-NEXT: k=1 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp new file mode 100644 index 0000000000000..552484b2981c4 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/iterfor.cpp @@ -0,0 +1,194 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter C("C"); + Reporter D("D"); +#pragma omp fuse + { + for (auto it = C.begin(); it != C.end(); ++it) + printf("v=%d\n", *it); + + for (auto it = D.begin(); it != D.end(); ++it) + printf("vv=%d\n", *it); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: [C] ctor +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] end() +// CHECK-NEXT: [C] iterator distance: 3 +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] end() +// CHECK-NEXT: [D] iterator distance: 3 +// CHECK-NEXT: [C] iterator advance: 0 += 0 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 0 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 1 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 1 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 2 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 2 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [C] dtor diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..e9f76713fe3e0 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,208 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("i=%d v=%d\n", i, v); + for (int vv = 0; vv < 3; ++vv) + printf("i=%d vv=%d\n", i, vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done + diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c new file mode 100644 index 0000000000000..272908e72c429 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (int j = 0; j < 3; ++j) + printf("i=%d j=%d\n", i, j); + for (int k = 0; k < 3; ++k) + printf("i=%d k=%d\n", i, k); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: i=0 j=0 +// CHECK-NEXT: i=0 k=0 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: i=0 k=1 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: i=0 k=2 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: i=1 k=0 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: i=1 k=1 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: i=1 k=2 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: i=2 k=0 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: i=2 k=1 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: i=2 k=2 +// CHECK-NEXT: done >From 34ac92ada84eeca9573d0b005f24d73738f46626 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:28:04 +0000 Subject: [PATCH 02/13] Add looprange clause --- clang/include/clang/AST/OpenMPClause.h | 100 ++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 8 + clang/include/clang/AST/StmtOpenMP.h | 9 +- .../clang/Basic/DiagnosticSemaKinds.td | 5 + clang/include/clang/Parse/Parser.h | 3 + clang/include/clang/Sema/SemaOpenMP.h | 6 + clang/lib/AST/OpenMPClause.cpp | 35 ++ clang/lib/AST/StmtOpenMP.cpp | 7 +- clang/lib/AST/StmtProfile.cpp | 7 + clang/lib/Basic/OpenMPKinds.cpp | 2 + clang/lib/Parse/ParseOpenMP.cpp | 36 ++ clang/lib/Sema/SemaOpenMP.cpp | 155 +++++++-- clang/lib/Sema/TreeTransform.h | 33 ++ clang/lib/Serialization/ASTReader.cpp | 11 + clang/lib/Serialization/ASTReaderStmt.cpp | 4 +- clang/lib/Serialization/ASTWriter.cpp | 8 + clang/test/OpenMP/fuse_ast_print.cpp | 67 ++++ clang/test/OpenMP/fuse_codegen.cpp | 320 +++++++++++++++++- clang/test/OpenMP/fuse_messages.cpp | 112 +++++- clang/tools/libclang/CIndex.cpp | 5 + llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 16 +- llvm/include/llvm/Frontend/OpenMP/OMP.td | 6 + 22 files changed, 919 insertions(+), 36 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 6fd16bc0f03be..8f937cdef9cd0 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1143,6 +1143,106 @@ class OMPFullClause final : public OMPNoChildClause { static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final : public OMPClause { + friend class OMPClauseReader; + + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of 'first' + SourceLocation FirstLoc; + + /// Location of 'count' + SourceLocation CountLoc; + + /// Expr associated with 'first' argument + Expr *First = nullptr; + + /// Expr associated with 'count' argument + Expr *Count = nullptr; + + /// Set 'first' + void setFirst(Expr *First) { this->First = First; } + + /// Set 'count' + void setCount(Expr *Count) { this->Count = Count; } + + /// Set location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Set location of 'first' argument + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + + /// Set location of 'count' argument + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + +public: + /// Build an AST node for a 'looprange' clause + /// + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param ModifierLoc Modifier location. + /// \param + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, Expr *First, Expr *Count); + + /// Build an empty 'looprange' node for deserialization + /// + /// \param C Context of the AST. + static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); + + /// Returns the location of '(' + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns the location of 'first' + SourceLocation getFirstLoc() const { return FirstLoc; } + + /// Returns the location of 'count' + SourceLocation getCountLoc() const { return CountLoc; } + + /// Returns the argument 'first' or nullptr if not set + Expr *getFirst() const { return cast_or_null(First); } + + /// Returns the argument 'count' or nullptr if not set + Expr *getCount() const { return cast_or_null(Count); } + + child_range children() { + return child_range(reinterpret_cast(&First), + reinterpret_cast(&Count) + 1); + } + + const_child_range children() const { + auto Children = const_cast(this)->children(); + return const_child_range(Children.begin(), Children.end()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_looprange; + } +}; + /// Representation of the 'partial' clause of the '#pragma omp unroll' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 918216e8df4aa..10e44e69dd5da 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3410,6 +3410,14 @@ bool RecursiveASTVisitor::VisitOMPFullClause(OMPFullClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPLoopRangeClause( + OMPLoopRangeClause *C) { + TRY_TO(TraverseStmt(C->getFirst())); + TRY_TO(TraverseStmt(C->getCount())); + return true; +} + template bool RecursiveASTVisitor::VisitOMPPartialClause(OMPPartialClause *C) { TRY_TO(TraverseStmt(C->getFactor())); diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index cb8bb91f4768c..f5115afd0753e 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5572,7 +5572,9 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { + // Tiling doubles the original number of loops setNumGeneratedLoops(2 * NumLoops); + // Produces a single top-level canonical loop nest setNumGeneratedLoopNests(1); } @@ -5804,6 +5806,7 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, EndLoc, NumLoops) { + // Reverse produces a single top-level canonical loop nest setNumGeneratedLoops(NumLoops); setNumGeneratedLoopNests(1); } @@ -5877,6 +5880,8 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { + // Interchange produces a single top-level canonical loop + // nest, with the exact same amount of total loops setNumGeneratedLoops(NumLoops); setNumGeneratedLoopNests(1); } @@ -5995,8 +6000,10 @@ class OMPFuseDirective final : public OMPLoopTransformationDirective { /// \param C Context of the AST /// \param NumClauses Number of clauses to allocate /// \param NumLoops Number of associated loops to allocate + /// \param NumLoopNests Number of top level loops to allocate static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, - unsigned NumLoops); + unsigned NumLoops, + unsigned NumLoopNests); /// Gets the associated loops after the transformation. This is the de-sugared /// replacement or nulltpr in dependent contexts. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fe9ca29038a1f..002aa7a774fbe 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11620,6 +11620,11 @@ def err_omp_not_a_loop_sequence : Error < "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; def err_omp_empty_loop_sequence : Error < "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; +def err_omp_invalid_looprange : Error < + "loop range in '#pragma omp %0' exceeds the number of available loops: " + "range end '%1' is greater than the total number of loops '%2'">; +def warn_omp_redundant_fusion : Warning < + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index a47e23ffbd357..08bee0078b5ff 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6735,6 +6735,9 @@ class Parser : public CodeCompletionHandler { OpenMPClauseKind Kind, bool ParseOnly); + /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. + OMPClause *ParseOpenMPLoopRangeClause(); + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index ea21377a8db9c..0c28aaf6ab21a 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -922,6 +922,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on well-form 'looprange' clause after parsing its arguments. + OMPClause * + ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc); /// Called on well-formed 'ordered' clause. OMPClause * ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0e5052b944162..0b5808eb100e4 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1024,6 +1024,26 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { return new (C) OMPPartialClause(); } +OMPLoopRangeClause * +OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + Expr *First, Expr *Count) { + OMPLoopRangeClause *Clause = CreateEmpty(C); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setLocEnd(EndLoc); + Clause->setFirstLoc(FirstLoc); + Clause->setCountLoc(CountLoc); + Clause->setFirst(First); + Clause->setCount(Count); + return Clause; +} + +OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPLoopRangeClause(); +} + OMPAllocateClause *OMPAllocateClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc, @@ -1888,6 +1908,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) { } } +void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) { + OS << "looprange"; + + Expr *First = Node->getFirst(); + Expr *Count = Node->getCount(); + + if (First && Count) { + OS << "("; + First->printPretty(OS, nullptr, Policy, 0); + OS << ","; + Count->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 276e43ec9f7d5..c5a6732cc2217 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -527,10 +527,13 @@ OMPFuseDirective *OMPFuseDirective::Create( OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, - unsigned NumLoops) { - return createEmptyDirective( + unsigned NumLoops, + unsigned NumLoopNests) { + OMPFuseDirective *Dir = createEmptyDirective( C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops); + Dir->setNumGeneratedLoopNests(NumLoopNests); + return Dir; } OMPForSimdDirective * diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c5d1d5b48508e..34ed3f22f6eb7 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) { Profiler->VisitExpr(Factor); } +void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + if (const Expr *First = C->getFirst()) + Profiler->VisitExpr(First); + if (const Expr *Count = C->getCount()) + Profiler->VisitExpr(Count); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index d172450512f13..18330181f1509 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 78d3503d8eb68..2d6d624c1ecc8 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3057,6 +3057,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPLoopRangeClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SourceLocation FirstLoc, CountLoc; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + FirstLoc = Tok.getLocation(); + ExprResult FirstVal = ParseConstantExpression(); + if (!FirstVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ExpectAndConsume(tok::comma); + + CountLoc = Tok.getLocation(); + ExprResult CountVal = ParseConstantExpression(); + if (!CountVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + T.consumeClose(); + + return Actions.OpenMP().ActOnOpenMPLoopRangeClause( + FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(), + FirstLoc, CountLoc, T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPPermutationClause() { SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; SmallVector ArgExprs; @@ -3485,6 +3518,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_looprange: + Clause = ParseOpenMPLoopRangeClause(); + break; default: break; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 84ac9587bd54d..3ec3f2ad31e78 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14326,7 +14326,6 @@ bool SemaOpenMP::checkTransformableLoopSequence( // and tries to match the input AST to the canonical loop sequence grammar // structure - auto NLCV = NestedLoopCounterVisitor(); // Helper functions to validate canonical loop sequence grammar is valid auto isLoopSequenceDerivation = [](auto *Child) { return isa(Child) || isa(Child) || @@ -14429,7 +14428,7 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Modularized code for handling regular canonical loops auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, &NLCV, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, this](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); @@ -14442,8 +14441,11 @@ bool SemaOpenMP::checkTransformableLoopSequence( << getOpenMPDirectiveName(Kind); return false; } + storeLoopStatements(Child); - NumLoops += NLCV.TraverseStmt(Child); + auto NLCV = NestedLoopCounterVisitor(); + NLCV.TraverseStmt(Child); + NumLoops += NLCV.getNestedLoopCount(); return true; }; @@ -15769,6 +15771,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); DeclContext *CurrContext = SemaRef.CurContext; Scope *CurScope = SemaRef.getCurScope(); @@ -15785,7 +15788,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SmallVector> OriginalInits; unsigned NumLoops; - // TODO: Support looprange clause using LoopSeqSize unsigned LoopSeqSize; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, @@ -15794,10 +15796,67 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, } // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder (0) + // because a dependent context could prevent determining its true value if (CurrContext->isDependentContext()) { return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, - NumLoops, 1, AStmt, nullptr, nullptr); + NumLoops, 0, AStmt, nullptr, nullptr); } + + // Handle clauses, which can be any of the following: [looprange, apply] + const OMPLoopRangeClause *LRC = + OMPExecutableDirective::getSingleClause(Clauses); + + // The clause arguments are invalidated if any error arises + // such as non-constant or non-positive arguments + if (LRC && (!LRC->getFirst() || !LRC->getCount())) + return StmtError(); + + // Delayed semantic check of LoopRange constraint + // Evaluates the loop range arguments and returns the first and count values + auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count, + uint64_t &FirstVal, + uint64_t &CountVal) { + llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context); + llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context); + FirstVal = FirstInt.getZExtValue(); + CountVal = CountInt.getZExtValue(); + }; + + // Checks if the loop range is valid + auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, + unsigned NumLoops) -> bool { + return FirstVal + CountVal - 1 <= NumLoops; + }; + uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + + if (LRC) { + EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, + CountVal); + if (CountVal == 1) + SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) { + SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) + << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1) + << LoopSeqSize; + return StmtError(); + } + + LastVal = FirstVal + CountVal - 1; + } + + // Complete fusion generates a single canonical loop nest + // However looprange clause generates several loop nests + unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1; + + // Emit a warning for redundant loop fusion when the sequence contains only + // one loop. + if (LoopSeqSize == 1) + SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + assert(LoopHelpers.size() == LoopSeqSize && "Expecting loop iteration space dimensionality to match number of " "affected loops"); @@ -15811,8 +15870,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SmallVector PreInits; // Select the type with the largest bit width among all induction variables - QualType IVType = LoopHelpers[0].IterationVarRef->getType(); - for (unsigned int I = 1; I < LoopSeqSize; ++I) { + QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); + for (unsigned int I = FirstVal; I < LastVal; ++I) { QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { IVType = CurrentIVType; @@ -15861,20 +15920,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Process each single loop to generate and collect declarations // and statements for all helper expressions - for (unsigned int I = 0; I < LoopSeqSize; ++I) { + for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], PreInits); - auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", I); - auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", I); - auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", I); + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); auto [NIVD, NIDStmt] = - CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", I, true); + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true); auto [IVVD, IVDStmt] = - CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", I); + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); if (!LBVD || !STVD || !NIVD || !IVVD) - return StmtError(); + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); UBVarDecls.push_back(UBVD); LBVarDecls.push_back(LBVD); @@ -15949,8 +16009,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // omp.fuse.max = max(omp.temp1, omp.temp0) ExprResult MaxExpr; - for (unsigned I = 0; I < LoopSeqSize; ++I) { - DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[I]); + // I is the true + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]); QualType NITy = NIRef->getType(); if (MaxExpr.isUnset()) { @@ -15958,7 +16019,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, MaxExpr = NIRef; } else { // Create a new acummulator variable t_i = MaxExpr - std::string TempName = (Twine(".omp.temp.") + Twine(I)).str(); + std::string TempName = (Twine(".omp.temp.") + Twine(J)).str(); VarDecl *TempDecl = buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); TempDecl->setInit(MaxExpr.get()); @@ -15981,7 +16042,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!Comparison.isUsable()) return StmtError(); - DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[I]); + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]); // Update MaxExpr using a conditional expression to hold the max value MaxExpr = new (Context) ConditionalOperator( Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), @@ -16034,23 +16095,21 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CompoundStmt *FusedBody = nullptr; SmallVector FusedBodyStmts; - for (unsigned I = 0; I < LoopSeqSize; ++I) { - + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { // Assingment of the original sub-loop index to compute the logical index // IV_k = LB_k + omp.fuse.index * ST_k - ExprResult IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, - MakeVarDeclRef(STVarDecls[I]), MakeIVRef()); + MakeVarDeclRef(STVarDecls[J]), MakeIVRef()); if (!IdxExpr.isUsable()) return StmtError(); IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, - MakeVarDeclRef(LBVarDecls[I]), IdxExpr.get()); + MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get()); if (!IdxExpr.isUsable()) return StmtError(); IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, - MakeVarDeclRef(IVVarDecls[I]), IdxExpr.get()); + MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get()); if (!IdxExpr.isUsable()) return StmtError(); @@ -16065,7 +16124,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, Stmt *Body = (isa(LoopStmts[I])) ? cast(LoopStmts[I])->getBody() : cast(LoopStmts[I])->getBody(); - BodyStmts.push_back(Body); CompoundStmt *CombinedBody = @@ -16073,7 +16131,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, SourceLocation(), SourceLocation()); ExprResult Condition = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), - MakeVarDeclRef(NIVarDecls[I])); + MakeVarDeclRef(NIVarDecls[J])); if (!Condition.isUsable()) return StmtError(); @@ -16094,8 +16152,26 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), IncrExpr.get()->getEndLoc()); + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + Stmt *FusionStmt = FusedForStmt; + if (LRC) { + SmallVector FinalLoops; + // Gather all the pre-fusion loops + for (unsigned I = 0; I < FirstVal - 1; ++I) + FinalLoops.push_back(LoopStmts[I]); + // Gather the fused loop + FinalLoops.push_back(FusedForStmt); + // Gather all the post-fusion loops + for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I) + FinalLoops.push_back(LoopStmts[I]); + FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + } return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, - 1, AStmt, FusedForStmt, + NumLoopNests, AStmt, FusionStmt, buildPreInits(Context, PreInits)); } @@ -17218,6 +17294,31 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr); } +OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( + Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) { + + // OpenMP [6.0, Restrictions] + // First and Count must be integer expressions with positive value + ExprResult FirstVal = + VerifyPositiveIntegerConstantInClause(First, OMPC_looprange); + if (FirstVal.isInvalid()) + First = nullptr; + + ExprResult CountVal = + VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange); + if (CountVal.isInvalid()) + Count = nullptr; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + // This check must be performed afterwards due to the delayed + // parsing and computation of the associated loop sequence + return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, + FirstLoc, CountLoc, EndLoc, First, Count); +} + OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 45f556f22c511..30204faf59b7b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1775,6 +1775,14 @@ class TreeTransform { LParenLoc, EndLoc); } + OMPClause * + RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPLoopRangeClause( + First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -10578,6 +10586,31 @@ TreeTransform::TransformOMPPartialClause(OMPPartialClause *C) { C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) { + ExprResult F = getDerived().TransformExpr(C->getFirst()); + if (F.isInvalid()) + return nullptr; + + ExprResult Cn = getDerived().TransformExpr(C->getCount()); + if (Cn.isInvalid()) + return nullptr; + + Expr *First = F.get(); + Expr *Count = Cn.get(); + + bool Changed = (First != C->getFirst()) || (Count != C->getCount()); + + // If no changes and AlwaysRebuild() is false, return the original clause + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + + return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(), + C->getLParenLoc(), C->getFirstLoc(), + C->getCountLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a3fbc3d25acab..d5e7c287c23a4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11089,6 +11089,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_partial: C = OMPPartialClause::CreateEmpty(Context); break; + case llvm::omp::OMPC_looprange: + C = OMPLoopRangeClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -11490,6 +11493,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + C->setFirst(Record.readSubExpr()); + C->setCount(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); + C->setFirstLoc(Record.readSourceLocation()); + C->setCountLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 291bd8ea4bf18..b424b5aa7b0c6 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -3616,7 +3616,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case STMT_OMP_FUSE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; - S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops); + unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops, + NumLoopNests); break; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index af7229d748872..c99ffab64c6e6 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7807,6 +7807,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + Record.AddStmt(C->getFirst()); + Record.AddStmt(C->getCount()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getFirstLoc()); + Record.AddSourceLocation(C->getCountLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp index 43ce815dab024..ac4f0d38a9c68 100644 --- a/clang/test/OpenMP/fuse_ast_print.cpp +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -271,6 +271,73 @@ void foo7() { } +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +//PRINT-LABEL: void foo9( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9 +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C +template +void foo9() { + // PRINT: #pragma omp fuse looprange(F,C) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(F,C) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + + } +} + +// Also test instantiating the template. +void tfoo9() { + foo9<1, 2>(); +} + diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp index 6c1e21092da43..d9500bed3ce31 100644 --- a/clang/test/OpenMP/fuse_codegen.cpp +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -53,6 +53,18 @@ extern "C" void foo3() { } } +extern "C" void foo4() { + double arr[256]; + + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 64; ++k) body(k); + for(int c = 42; auto &&v: arr) body(c,v); + } +} + #endif // CHECK1-LABEL: define dso_local void @body( @@ -777,6 +789,157 @@ extern "C" void foo3() { // CHECK1-NEXT: ret void // // +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2:.*]] +// CHECK1: [[FOR_COND2]]: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK1: [[FOR_BODY4]]: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK1: [[IF_THEN9]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK1-NEXT: br label %[[IF_END14]] +// CHECK1: [[IF_END14]]: +// CHECK1-NEXT: br label %[[FOR_INC15:.*]] +// CHECK1: [[FOR_INC15]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: [[FOR_END17]]: +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19:.*]] +// CHECK1: [[FOR_COND19]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK1: [[FOR_BODY21]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK1-NEXT: br label %[[FOR_INC22:.*]] +// CHECK1: [[FOR_INC22]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19]] +// CHECK1: [[FOR_END23]]: +// CHECK1-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @body( // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1259,6 +1422,157 @@ extern "C" void foo3() { // CHECK2-NEXT: ret void // // +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2:.*]] +// CHECK2: [[FOR_COND2]]: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK2: [[FOR_BODY4]]: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK2: [[IF_THEN9]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK2-NEXT: br label %[[IF_END14]] +// CHECK2: [[IF_END14]]: +// CHECK2-NEXT: br label %[[FOR_INC15:.*]] +// CHECK2: [[FOR_INC15]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: [[FOR_END17]]: +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19:.*]] +// CHECK2: [[FOR_COND19]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK2: [[FOR_BODY21]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK2-NEXT: br label %[[FOR_INC22:.*]] +// CHECK2: [[FOR_INC22]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19]] +// CHECK2: [[FOR_END23]]: +// CHECK2-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @tfoo2( // CHECK2-SAME: ) #[[ATTR0]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1494,7 +1808,7 @@ extern "C" void foo3() { // CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void // @@ -1503,9 +1817,13 @@ extern "C" void foo3() { // CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} // CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} //. // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} // CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} //. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 50dedfd2c0dc6..2a2491d008a0b 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -33,6 +33,8 @@ void func() { { for (int i = 0; i < 7; ++i) ; + for(int j = 0; j < 100; ++j); + } @@ -41,6 +43,8 @@ void func() { { for (int i = 0; i < 7; ++i) ; + for(int j = 0; j < 100; ++j); + } //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}} @@ -50,6 +54,7 @@ void func() { for(int i = 0; i < 10; i*=2) { ; } + for(int j = 0; j < 100; ++j); } //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} @@ -73,4 +78,109 @@ void func() { for(unsigned int j = 0; j < 10; ++j); for(long long k = 0; k < 100; ++k); } -} \ No newline at end of file + + //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + } + + //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(1, 1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, -1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, 0) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + const int x = 1; + constexpr int y = 4; + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(x,y) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(1,420) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +// In a template context, but expression itself not instantiation-dependent +template +static void templated_func() { + + //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(2,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(3,3) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + +} + +template +static void templated_func_value_dependent() { + + //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(V,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +template +static void templated_func_type_dependent() { + constexpr T s = 1; + + //expected-error at +1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(s,s-1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + + +void template_inst() { + // expected-note at +1 {{in instantiation of function template specialization 'templated_func' requested here}} + templated_func(); + // expected-note at +1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}} + templated_func_value_dependent<1>(); + // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent' requested here}} + templated_func_type_dependent(); + +} + + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3afa59b2f2d6c..deb6fdd68e6d3 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2417,6 +2417,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) { Visitor->AddStmt(C->getFactor()); } +void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + Visitor->AddStmt(C->getFirst()); + Visitor->AddStmt(C->getCount()); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index de888ff86fe91..a2f258bbf745b 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1233,6 +1233,15 @@ struct WriteT { using EmptyTrait = std::true_type; }; +// V6: [6.4.7] Looprange clause +template struct LoopRangeT { + using Begin = E; + using End = E; + + using TupleTrait = std::true_type; + std::tuple t; +}; + // --- template @@ -1263,9 +1272,10 @@ using TupleClausesT = DefaultmapT, DeviceT, DistScheduleT, DoacrossT, FromT, GrainsizeT, IfT, InitT, InReductionT, - LastprivateT, LinearT, MapT, - NumTasksT, OrderT, ReductionT, - ScheduleT, TaskReductionT, ToT>; + LastprivateT, LinearT, LoopRangeT, + MapT, NumTasksT, OrderT, + ReductionT, ScheduleT, + TaskReductionT, ToT>; template using UnionClausesT = std::variant>; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 6352be8069e9e..989b35a7caa2a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -274,6 +274,9 @@ def OMPC_Linear : Clause<[Spelling<"linear">]> { def OMPC_Link : Clause<[Spelling<"link">]> { let flangClass = "OmpObjectList"; } +def OMPC_LoopRange : Clause<[Spelling<"looprange">]> { + let clangClass = "OMPLoopRangeClause"; +} def OMPC_Map : Clause<[Spelling<"map">]> { let clangClass = "OMPMapClause"; let flangClass = "OmpMapClause"; @@ -856,6 +859,9 @@ def OMP_For : Directive<[Spelling<"for">]> { let languages = [L_C]; } def OMP_Fuse : Directive<[Spelling<"fuse">]> { + let allowedOnceClauses = [ + VersionedClause + ]; let association = AS_Loop; let category = CA_Executable; } >From c76888b9dd1f516512d2d64afa4766adaad4de1e Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:30:39 +0000 Subject: [PATCH 03/13] Added fuse to documentation --- clang/docs/OpenMPSupport.rst | 2 ++ clang/docs/ReleaseNotes.rst | 1 + 2 files changed, 3 insertions(+) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 986aaabe1eed4..12325e3294a64 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,6 +376,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop fuse transformation | :good:`done` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | task_iteration | :none:`unclaimed` | :none:`unclaimed` | | diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index dd748ab06873d..c75cb25a4db73 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1128,6 +1128,7 @@ OpenMP Support - Fixed mapping of arrays of structs containing nested structs with user defined mappers, by using compiler-generated default mappers for the outer structs for such maps. +- Added support for 'omp fuse' directive. Improvements ^^^^^^^^^^^^ >From 1c054673983282e7e6afef0f11c2a7fbe39181d7 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:43:41 +0000 Subject: [PATCH 04/13] Refactored preinits handling and improved coverage --- clang/docs/OpenMPSupport.rst | 2 +- clang/include/clang/AST/StmtOpenMP.h | 5 +- clang/include/clang/Sema/SemaOpenMP.h | 96 +- clang/lib/AST/StmtOpenMP.cpp | 13 + clang/lib/Basic/OpenMPKinds.cpp | 3 +- clang/lib/CodeGen/CGExpr.cpp | 2 + clang/lib/CodeGen/CodeGenFunction.h | 4 + clang/lib/Sema/SemaOpenMP.cpp | 588 ++++--- clang/test/OpenMP/fuse_ast_print.cpp | 55 + clang/test/OpenMP/fuse_codegen.cpp | 2117 +++++++++++++++---------- 10 files changed, 1862 insertions(+), 1023 deletions(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 12325e3294a64..8d62208e55f75 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,7 +376,7 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ -| loop fuse transformation | :good:`done` | :none:`unclaimed` | | +| loop fuse transformation | :good:`prototyped` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index f5115afd0753e..6425f6616a558 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1005,8 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass || - C == OMPFuseDirectiveClass; + C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass; } }; @@ -5653,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_stripe, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + // Similar to Tile, it only generates a single top level loop nest + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 0c28aaf6ab21a..547ea95c6cd5d 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1494,16 +1494,96 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); - /// Analyzes and checks a loop sequence for use by a loop transformation + /// @brief Categories of loops encountered during semantic OpenMP loop + /// analysis + /// + /// This enumeration identifies the structural category of a loop or sequence + /// of loops analyzed in the context of OpenMP transformations and directives. + /// This categorization helps differentiate between original source loops + /// and the structures resulting from applying OpenMP loop transformations. + enum class OMPLoopCategory { + + /// @var OMPLoopCategory::RegularLoop + /// Represents a standard canonical loop nest found in the + /// original source code or an intact loop after transformations + /// (i.e Post/Pre loops of a loopranged fusion) + RegularLoop, + + /// @var OMPLoopCategory::TransformSingleLoop + /// Represents the resulting loop structure when an OpenMP loop + // transformation, generates a single, top-level loop + TransformSingleLoop, + + /// @var OMPLoopCategory::TransformLoopSequence + /// Represents the resulting loop structure when an OpenMP loop + /// transformation + /// generates a sequence of two or more canonical loop nests + TransformLoopSequence + }; + + /// The main recursive process of `checkTransformableLoopSequence` that + /// performs grammatical parsing of a canonical loop sequence. It extracts + /// key information, such as the number of top-level loops, loop statements, + /// helper expressions, and other relevant loop-related data, all in a single + /// execution to avoid redundant traversals. This analysis flattens inner + /// Loop Sequences + /// + /// \param LoopSeqStmt The AST of the original statement. + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too). + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformPreInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. + /// \param Context + /// \param Kind The loop transformation directive kind. + /// \return Whether the original statement is both syntactically and + /// semantically correct according to OpenMP 6.0 canonical loop + /// sequence definition. + bool analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind); + + /// Validates and checks whether a loop sequence can be transformed according + /// to the given directive, providing necessary setup and initialization + /// (Driver function) before recursion using `analyzeLoopSequence`. /// /// \param Kind The loop transformation directive kind. - /// \param NumLoops [out] Number of total canonical loops - /// \param LoopSeqSize [out] Number of top level canonical loops + /// \param AStmt The AST of the original statement + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too) /// \param LoopHelpers [out] The multiple loop analyses results. - /// \param LoopStmts [out] The multiple Stmt of each For loop. - /// \param OriginalInits [out] The multiple collection of statements and + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformsPreInits [out] The multiple collection of statements and /// declarations that must have been executed/declared - /// before entering the loop. + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. /// \param Context /// \return Whether there was an absence of errors or not bool checkTransformableLoopSequence( @@ -1512,7 +1592,9 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, SmallVectorImpl> &OriginalInits, - ASTContext &Context); + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index c5a6732cc2217..f527e6361b5e5 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -457,6 +457,8 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); // The number of generated loops and loop nests during unroll matches + // given that unroll only generates top level canonical loop nests + // so each generated loop is a top level canonical loop nest Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); @@ -520,6 +522,17 @@ OMPFuseDirective *OMPFuseDirective::Create( NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); + // The number of top level canonical nests could + // not match the total number of generated loops + // Example: + // Before fusion: + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) + // A[i][j] = i + j; + // + // for (int k = 0; k < P; ++k) + // B[k] = k * 2; + // Here, NumLoopNests = 2, but NumLoops = 3. Dir->setNumGeneratedLoopNests(NumLoopNests); Dir->setNumGeneratedLoops(NumLoops); return Dir; diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 18330181f1509..53a9f80e6d3b7 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -704,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe || DKind == OMPD_fuse; + DKind == OMPD_interchange || DKind == OMPD_stripe || + DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6cb348ffdf55f..08049d4d4e37d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3257,6 +3257,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // No other cases for now. } else { + llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n"; + VD->dumpColor(); llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index fe753e5b688b1..bfe24213ed377 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5363,6 +5363,10 @@ class CodeGenFunction : public CodeGenTypeCache { /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { + if (LocalDeclMap.count(VD)) { + llvm::errs() << "Warning: VarDecl already exists in map: "; + VD->dumpColor(); + } assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3ec3f2ad31e78..3ce256f3ec23b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,6 +22,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -47,6 +48,7 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include +#include using namespace clang; using namespace llvm::omp; @@ -14194,6 +14196,45 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +// Overloaded base case function +template +static bool tryHandleAs(T *t, F &&) { + return false; +} + +/** + * Tries to recursively cast `t` to one of the given types and invokes `f` if successful. + * + * @tparam Class The first type to check. + * @tparam Rest The remaining types to check. + * @tparam T The base type of `t`. + * @tparam F The callable type for the function to invoke upon a successful cast. + * @param t The object to be checked. + * @param f The function to invoke if `t` matches `Class`. + * @return `true` if `t` matched any type and `f` was called, otherwise `false`. + */ +template +static bool tryHandleAs(T *t, F &&f) { + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } else { + return tryHandleAs(t, std::forward(f)); + } +} + +// Updates OriginalInits by checking Transform against loop transformation +// directives and appending their pre-inits if a match is found. +static void updatePreInits(OMPLoopBasedDirective *Transform, + SmallVectorImpl> &PreInits) { + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); +} + bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, @@ -14224,121 +14265,106 @@ bool SemaOpenMP::checkTransformableLoopNest( return false; }, [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + updatePreInits(Transform, OriginalInits); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } -class NestedLoopCounterVisitor - : public clang::RecursiveASTVisitor { +// Counts the total number of nested loops, including the outermost loop (the +// original loop). PRECONDITION of this visitor is that it must be invoked from +// the original loop to be analyzed. The traversal is stop for Decl's and +// Expr's given that they may contain inner loops that must not be counted. +// +// Example AST structure for the code: +// +// int main() { +// #pragma omp fuse +// { +// for (int i = 0; i < 100; i++) { <-- Outer loop +// []() { +// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +// }; +// for(int j = 0; j < 5; ++j) {} <-- Inner loop +// } +// for (int r = 0; i < 100; i++) { <-- Outer loop +// struct LocalClass { +// void bar() { +// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +// } +// }; +// for(int k = 0; k < 10; ++k) {} <-- Inner loop +// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +// } +// } +// } +// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { +private: + unsigned NestedLoopCount = 0; + public: - explicit NestedLoopCounterVisitor() : NestedLoopCount(0) {} + explicit NestedLoopCounterVisitor() {} - bool VisitForStmt(clang::ForStmt *FS) { - ++NestedLoopCount; - return true; + unsigned getNestedLoopCount() const { return NestedLoopCount; } + + bool VisitForStmt(ForStmt *FS) override { + ++NestedLoopCount; + return true; } - bool VisitCXXForRangeStmt(clang::CXXForRangeStmt *FRS) { - ++NestedLoopCount; - return true; + bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { + ++NestedLoopCount; + return true; } - unsigned getNestedLoopCount() const { return NestedLoopCount; } + bool TraverseStmt(Stmt *S) override { + if (!S) + return true; -private: - unsigned NestedLoopCount; + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) + return true; + + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S) || isa(S) || + isa(S)) { + return DynamicRecursiveASTVisitor::TraverseStmt(S); + } + + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; + } + + bool TraverseDecl(Decl *D) override { + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; + } }; -bool SemaOpenMP::checkTransformableLoopSequence( - OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, - unsigned &NumLoops, +bool SemaOpenMP::analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, SmallVectorImpl> &OriginalInits, - ASTContext &Context) { + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind) { - // Checks whether the given statement is a compound statement VarsWithInheritedDSAType TmpDSA; - if (!isa(AStmt)) { - Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) - << getOpenMPDirectiveName(Kind); - return false; - } - // Callback for updating pre-inits in case there are even more - // loop-sequence-generating-constructs inside of the main compound stmt - auto OnTransformationCallback = - [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); - }; - - // Number of top level canonical loop nests observed (And acts as index) - LoopSeqSize = 0; - // Number of total observed loops - NumLoops = 0; - - // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows - // the grammar: - // - // canonical-loop-sequence: - // { - // loop-sequence+ - // } - // where loop-sequence can be any of the following: - // 1. canonical-loop-sequence - // 2. loop-nest - // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) - // - // To recognise and traverse this structure the following helper functions - // have been defined. handleLoopSequence serves as the recurisve entry point - // and tries to match the input AST to the canonical loop sequence grammar - // structure - - // Helper functions to validate canonical loop sequence grammar is valid - auto isLoopSequenceDerivation = [](auto *Child) { - return isa(Child) || isa(Child) || - isa(Child); - }; - auto isLoopGeneratingStmt = [](auto *Child) { - return isa(Child); - }; - + QualType BaseInductionVarType; // Helper Lambda to handle storing initialization and body statements for both // ForStmt and CXXForRangeStmt and checks for any possible mismatch between // induction variables types - QualType BaseInductionVarType; auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, this, &Context](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { @@ -14361,33 +14387,35 @@ bool SemaOpenMP::checkTransformableLoopSequence( } } } - } else { - assert(isa(LoopStmt) && - "Expected canonical for or range-based for loops."); - auto *CXXFor = dyn_cast(LoopStmt); + auto *CXXFor = cast(LoopStmt); OriginalInits.back().push_back(CXXFor->getBeginStmt()); ForStmts.push_back(CXXFor); } }; + // Helper lambda functions to encapsulate the processing of different // derivations of the canonical loop sequence grammar // // Modularized code for handling loop generation and transformations - auto handleLoopGeneration = [&storeLoopStatements, &LoopHelpers, - &OriginalInits, &LoopSeqSize, &NumLoops, Kind, - &TmpDSA, &OnTransformationCallback, - this](Stmt *Child) { + auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers, + &OriginalInits, &TransformsPreInits, + &LoopCategories, &LoopSeqSize, &NumLoops, Kind, + &TmpDSA, &ForStmts, &Context, + &LoopSequencePreInits, this](Stmt *Child) { auto LoopTransform = dyn_cast(Child); Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); - + unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); // Handle the case where transformed statement is not available due to // dependent contexts if (!TransformedStmt) { - if (NumGeneratedLoopNests > 0) + if (NumGeneratedLoopNests > 0) { + LoopSeqSize += NumGeneratedLoopNests; + NumLoops += NumGeneratedLoops; return true; - // Unroll full + } + // Unroll full (0 loops produced) else { Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); @@ -14400,38 +14428,56 @@ bool SemaOpenMP::checkTransformableLoopSequence( Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; - // Future loop transformations that generate multiple canonical loops - } else if (NumGeneratedLoopNests > 1) { - llvm_unreachable("Multiple canonical loop generating transformations " - "like loop splitting are not yet supported"); } + // Loop transformatons such as split or loopranged fuse + else if (NumGeneratedLoopNests > 1) { + // Get the preinits related to this loop sequence generating + // loop transformation (i.e loopranged fuse, split...) + LoopSequencePreInits.emplace_back(); + // These preinits differ slightly from regular inits/pre-inits related + // to single loop generating loop transformations (interchange, unroll) + // given that they are not bounded to a particular loop nest + // so they need to be treated independently + updatePreInits(LoopTransform, LoopSequencePreInits); + return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops, + LoopHelpers, ForStmts, OriginalInits, + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context, Kind); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + else { + // Process the transformed loop statement + OriginalInits.emplace_back(); + TransformsPreInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, + *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + storeLoopStatements(TransformedStmt); + updatePreInits(LoopTransform, TransformsPreInits); - // Process the transformed loop statement - Child = TransformedStmt; - OriginalInits.emplace_back(); - LoopHelpers.emplace_back(); - OnTransformationCallback(LoopTransform); - - unsigned IsCanonical = - checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, - TmpDSA, LoopHelpers[LoopSeqSize]); - - if (!IsCanonical) { - Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) - << getOpenMPDirectiveName(Kind); - return false; + NumLoops += NumGeneratedLoops; + ++LoopSeqSize; + return true; } - storeLoopStatements(TransformedStmt); - NumLoops += LoopTransform->getNumGeneratedLoops(); - return true; }; // Modularized code for handling regular canonical loops - auto handleRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, - this](Stmt *Child) { + auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, + &LoopSeqSize, &NumLoops, Kind, &TmpDSA, + &LoopCategories, this](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::RegularLoop); + unsigned IsCanonical = checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); @@ -14449,57 +14495,114 @@ bool SemaOpenMP::checkTransformableLoopSequence( return true; }; - // Helper function to process a Loop Sequence Recursively - auto handleLoopSequence = [&](Stmt *LoopSeqStmt, - auto &handleLoopSequenceCallback) -> bool { - for (auto *Child : LoopSeqStmt->children()) { - if (!Child) - continue; + // Helper functions to validate canonical loop sequence grammar is valid + auto isLoopSequenceDerivation = [](auto *Child) { + return isa(Child) || isa(Child) || + isa(Child); + }; + auto isLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + - // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { - Child = Child->IgnoreContainers(); + // High level grammar validation + for (auto *Child : LoopSeqStmt->children()) { - // Ignore empty compound statement if (!Child) - continue; + continue; - // In the case of a nested loop sequence ignoring containers would not - // be enough, a recurisve transversal of the loop sequence is required - if (isa(Child)) { - if (!handleLoopSequenceCallback(Child, handleLoopSequenceCallback)) - return false; - // Already been treated, skip this children - continue; + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers, + ForStmts, OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { + if (isLoopGeneratingStmt(Child)) { + if (!analyzeLoopGeneration(Child)) { + return false; } + // analyzeLoopGeneration updates Loop Sequence size accordingly + + } else { + if (!analyzeRegularLoop(Child)) { + return false; + } + // Update the Loop Sequence size by one + ++LoopSeqSize; } - // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { - if (isLoopGeneratingStmt(Child)) { - if (!handleLoopGeneration(Child)) { - return false; - } } else { - if (!handleRegularLoop(Child)) { - return false; - } + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; } - ++LoopSeqSize; - } else { - // Report error for invalid statement inside canonical loop sequence - Diag(Child->getBeginLoc(), diag::err_omp_not_for) - << 0 << getOpenMPDirectiveName(Kind); + } + return true; +} + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context) { + + // Checks whether the given statement is a compound statement + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); return false; - } - } - return true; - }; + } + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. analyzeLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure. This function will perform both a semantic and syntactical + // analysis of the given statement according to OpenMP 6.0 definition of + // the aforementioned canonical loop sequence // Recursive entry point to process the main loop sequence - if (!handleLoopSequence(AStmt, handleLoopSequence)) { - return false; + if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, + OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) { + return false; } - if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) << getOpenMPDirectiveName(Kind); @@ -14531,9 +14634,7 @@ static void addLoopPreInits(ASTContext &Context, RangeEnd->getBeginLoc(), RangeEnd->getEndLoc())); } - llvm::append_range(PreInits, OriginalInit); - // List of OMPCapturedExprDecl, for __begin, __end, and NumIterations if (auto *PI = cast_or_null(LoopHelper.PreInits)) { PreInits.push_back(new (Context) DeclStmt( @@ -15214,7 +15315,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *LoopStmt = nullptr; collectLoopStmts(AStmt, {LoopStmt}); - // Determine the PreInit declarations. + // Determine the PreInit declarations.e SmallVector PreInits; addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); @@ -15781,28 +15882,35 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!AStmt) { return StmtError(); } + + unsigned NumLoops = 1; + unsigned LoopSeqSize = 1; + + // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder 1 (even though + // using looprange fuse could yield up to 3 top level loop nests) + // because a dependent context could prevent determining its true value + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, LoopSeqSize, AStmt, nullptr, + nullptr); + } + // Validate that the potential loop sequence is transformable for fusion // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops SmallVector LoopHelpers; SmallVector LoopStmts; SmallVector> OriginalInits; - - unsigned NumLoops; - unsigned LoopSeqSize; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; + SmallVector LoopCategories; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, - Context)) { + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context)) { return StmtError(); } - // Defer transformation in dependent contexts - // The NumLoopNests argument is set to a placeholder (0) - // because a dependent context could prevent determining its true value - if (CurrContext->isDependentContext()) { - return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, - NumLoops, 0, AStmt, nullptr, nullptr); - } - // Handle clauses, which can be any of the following: [looprange, apply] const OMPLoopRangeClause *LRC = OMPExecutableDirective::getSingleClause(Clauses); @@ -15864,11 +15972,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, "Expecting loop iteration space dimensionality to match number of " "affected loops"); - // PreInits hold a sequence of variable declarations that must be executed - // before the fused loop begins. These include bounds, strides, and other - // helper variables required for the transformation. - SmallVector PreInits; - // Select the type with the largest bit width among all induction variables QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); for (unsigned int I = FirstVal; I < LastVal; ++I) { @@ -15880,7 +15983,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, uint64_t IVBitWidth = Context.getIntWidth(IVType); // Create pre-init declarations for all loops lower bounds, upper bounds, - // strides and num-iterations + // strides and num-iterations for every top level loop in the fusion SmallVector LBVarDecls; SmallVector STVarDecls; SmallVector NIVarDecls; @@ -15918,12 +16021,62 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, return std::make_pair(VD, DeclStmt); }; + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. Other loop transforms + // also contain their own preinits + SmallVector PreInits; + // Iterator to keep track of loop transformations + unsigned int TransformIndex = 0; + + // Update the general preinits using the preinits generated by loop sequence + // generating loop transformations. These preinits differ slightly from + // single-loop transformation preinits, as they can be detached from a + // specific loop inside the multiple generated loop nests. This happens + // because certain helper variables, like '.omp.fuse.max', are introduced to + // handle fused iteration spaces and may not be directly tied to a single + // original loop. the preinit structure must ensure that hidden variables + // like '.omp.fuse.max' are still properly handled. + // Transformations that apply this concept: Loopranged Fuse, Split + if (!LoopSequencePreInits.empty()) { + for (const auto <PreInits : LoopSequencePreInits) { + if (!LTPreInits.empty()) { + llvm::append_range(PreInits, LTPreInits); + } + } + } + // Process each single loop to generate and collect declarations - // and statements for all helper expressions + // and statements for all helper expressions related to + // particular single loop nests + + // Also In the case of the fused loops, we keep track of their original + // inits by appending them to their preinits statement, and in the case of + // transformations, also append their preinits (which contain the original + // loop initialization statement or other statements) + + // Firstly we need to update TransformIndex to match the begining of the + // looprange section + for (unsigned int I = 0; I < FirstVal - 1; ++I) { + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + } for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { - addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], - PreInits); + if (LoopCategories[I] == OMPLoopCategory::RegularLoop) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + // For transformed loops, insert both pre-inits and original inits. + // Order matters: pre-inits may define variables used in the original + // inits such as upper bounds... + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); @@ -15942,7 +16095,6 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, NIVarDecls.push_back(NIVD); IVVarDecls.push_back(IVVD); - PreInits.push_back(UBDStmt.get()); PreInits.push_back(LBDStmt.get()); PreInits.push_back(STDStmt.get()); PreInits.push_back(NIDStmt.get()); @@ -16118,6 +16270,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, BodyStmts.push_back(IdxExpr.get()); llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + // If the loop is a CXXForRangeStmt then the iterator variable is needed if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); @@ -16152,21 +16305,50 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), IncrExpr.get()->getEndLoc()); - // In the case of looprange, the result of fuse won't simply - // be a single loop (ForStmt), but rather a loop sequence - // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop - // and the post-fusion loops, preserving its original order. + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + // + // Note: If looprange clause produces a single fused loop nest then + // this compound statement wrapper is unnecessary (Therefore this + // treatment is skipped) + Stmt *FusionStmt = FusedForStmt; - if (LRC) { + if (LRC && CountVal != LoopSeqSize) { SmallVector FinalLoops; - // Gather all the pre-fusion loops - for (unsigned I = 0; I < FirstVal - 1; ++I) - FinalLoops.push_back(LoopStmts[I]); - // Gather the fused loop - FinalLoops.push_back(FusedForStmt); - // Gather all the post-fusion loops - for (unsigned I = FirstVal + CountVal - 1; I < LoopSeqSize; ++I) + // Reset the transform index + TransformIndex = 0; + + // Collect all non-fused loops before and after the fused region. + // Pre-fusion and post-fusion loops are inserted in order exploiting their + // symmetry, along with their corresponding transformation pre-inits if + // needed. The fused loop is added between the two regions. + for (unsigned I = 0; I < LoopSeqSize; ++I) { + if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) { + // Update the Transformation counter to skip already treated + // loop transformations + if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + continue; + } + + // No need to handle: + // Regular loops: they are kept intact as-is. + // Loop-sequence-generating transformations: already handled earlier. + // Only TransformSingleLoop requires inserting pre-inits here + + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + } + FinalLoops.push_back(LoopStmts[I]); + } + + FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt); FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), SourceLocation(), SourceLocation()); } diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp index ac4f0d38a9c68..9d85bd1172948 100644 --- a/clang/test/OpenMP/fuse_ast_print.cpp +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -338,6 +338,61 @@ void tfoo9() { foo9<1, 2>(); } +// PRINT-LABEL: void foo10( +// DUMP-LABEL: FunctionDecl {{.*}} foo10 +void foo10() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int ii = 0; ii < 10; ii += 2) + // DUMP: ForStmt + for (int ii = 0; ii < 10; ii += 2) + // PRINT: body(ii) + // DUMP: CallExpr + body(ii); + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + { + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int jj = 10; jj > 0; --jj) + // DUMP: ForStmt + for (int jj = 10; jj > 0; --jj) + // PRINT: body(jj) + // DUMP: CallExpr + body(jj); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + // PRINT: for (int kk = 0; kk <= 10; ++kk) + // DUMP: ForStmt + for (int kk = 0; kk <= 10; ++kk) + // PRINT: body(kk) + // DUMP: CallExpr + body(kk); + } + } + +} diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp index d9500bed3ce31..742c280ed0172 100644 --- a/clang/test/OpenMP/fuse_codegen.cpp +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -65,6 +65,23 @@ extern "C" void foo4() { } } +// This exemplifies the usage of loop transformations that generate +// more than top level canonical loop nests (e.g split, loopranged fuse...) +extern "C" void foo5() { + double arr[256]; + #pragma omp fuse looprange(2,2) + { + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 512; ++k) body(k); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + #endif // CHECK1-LABEL: define dso_local void @body( @@ -88,7 +105,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -97,7 +113,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -129,107 +144,103 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] // CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] // CHECK1: [[IF_THEN22]]: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] // CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] // CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]]) // CHECK1-NEXT: br label %[[IF_END27]] // CHECK1: [[IF_END27]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK1: [[FOR_END]]: @@ -256,7 +267,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -265,7 +275,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -274,7 +283,6 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 @@ -304,172 +312,166 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] // CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 // CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK1: [[COND_TRUE]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK1-NEXT: br label %[[COND_END:.*]] // CHECK1: [[COND_FALSE]]: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK1-NEXT: br label %[[COND_END]] // CHECK1: [[COND_END]]: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] // CHECK1: [[COND_TRUE30]]: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 // CHECK1-NEXT: br label %[[COND_END32:.*]] // CHECK1: [[COND_FALSE31]]: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 // CHECK1-NEXT: br label %[[COND_END32]] // CHECK1: [[COND_END32]]: -// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] // CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] -// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] -// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] // CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] // CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] // CHECK1: [[IF_THEN40]]: -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] // CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] -// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] // CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]]) // CHECK1-NEXT: br label %[[IF_END45]] // CHECK1: [[IF_END45]]: -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] // CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] // CHECK1: [[IF_THEN47]]: -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] -// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] // CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] -// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] // CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]]) // CHECK1-NEXT: br label %[[IF_END52]] // CHECK1: [[IF_END52]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: [[FOR_END]]: @@ -481,13 +483,11 @@ extern "C" void foo4() { // CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -497,48 +497,43 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[__END224:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 @@ -565,225 +560,219 @@ extern "C" void foo4() { // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 // CHECK1-NEXT: store i32 42, ptr [[C]], align 4 // CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 // CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 // CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 -// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 -// CHECK1-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 -// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 -// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 // CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 -// CHECK1-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 -// CHECK1-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 -// CHECK1-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK1-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] -// CHECK1-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 -// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 -// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 -// CHECK1-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 -// CHECK1-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 -// CHECK1-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK1-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 -// CHECK1-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] -// CHECK1: [[COND_TRUE44]]: -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK1-NEXT: br label %[[COND_END46:.*]] -// CHECK1: [[COND_FALSE45]]: -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: br label %[[COND_END46]] -// CHECK1: [[COND_END46]]: -// CHECK1-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] -// CHECK1-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] -// CHECK1: [[COND_TRUE50]]: -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK1-NEXT: br label %[[COND_END52:.*]] -// CHECK1: [[COND_FALSE51]]: -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: br label %[[COND_END52]] -// CHECK1: [[COND_END52]]: -// CHECK1-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] -// CHECK1-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK1: [[COND_TRUE42]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: br label %[[COND_END44:.*]] +// CHECK1: [[COND_FALSE43]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END44]] +// CHECK1: [[COND_END44]]: +// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK1: [[COND_TRUE48]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50:.*]] +// CHECK1: [[COND_FALSE49]]: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50]] +// CHECK1: [[COND_END50]]: +// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK1-NEXT: br label %[[FOR_COND:.*]] // CHECK1: [[FOR_COND]]: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK1-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK1: [[FOR_BODY]]: -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK1-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] // CHECK1: [[IF_THEN]]: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 -// CHECK1-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 -// CHECK1-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] -// CHECK1-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] -// CHECK1-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 -// CHECK1-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 -// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 -// CHECK1-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] -// CHECK1-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK1-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] -// CHECK1: [[IF_THEN64]]: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] -// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] -// CHECK1-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK1-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 -// CHECK1-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] -// CHECK1-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN62]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]]) // CHECK1-NEXT: br label %[[IF_END]] // CHECK1: [[IF_END]]: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK1-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] -// CHECK1: [[IF_THEN70]]: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] -// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] -// CHECK1-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK1-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 -// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] -// CHECK1-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP55]]) -// CHECK1-NEXT: br label %[[IF_END75]] -// CHECK1: [[IF_END75]]: -// CHECK1-NEXT: br label %[[IF_END76]] -// CHECK1: [[IF_END76]]: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK1-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] -// CHECK1: [[IF_THEN78]]: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] -// CHECK1-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] -// CHECK1-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 -// CHECK1-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 -// CHECK1-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] -// CHECK1-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) -// CHECK1-NEXT: br label %[[IF_END83]] -// CHECK1: [[IF_END83]]: -// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK1-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] -// CHECK1-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] -// CHECK1: [[IF_THEN85]]: -// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] -// CHECK1-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] -// CHECK1-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 -// CHECK1-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 -// CHECK1-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] -// CHECK1-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 -// CHECK1-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) -// CHECK1-NEXT: br label %[[IF_END90]] -// CHECK1: [[IF_END90]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK1: [[IF_THEN68]]: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END73]] +// CHECK1: [[IF_END73]]: +// CHECK1-NEXT: br label %[[IF_END74]] +// CHECK1: [[IF_END74]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK1: [[IF_THEN76]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK1-NEXT: br label %[[IF_END81]] +// CHECK1: [[IF_END81]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK1: [[IF_THEN83]]: +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK1-NEXT: br label %[[IF_END88]] +// CHECK1: [[IF_END88]]: // CHECK1-NEXT: br label %[[FOR_INC:.*]] // CHECK1: [[FOR_INC]]: -// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 -// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: [[FOR_END]]: // CHECK1-NEXT: ret void @@ -794,13 +783,11 @@ extern "C" void foo4() { // CHECK1-NEXT: [[ENTRY:.*:]] // CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -815,12 +802,10 @@ extern "C" void foo4() { // CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK1-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 @@ -940,6 +925,277 @@ extern "C" void foo4() { // CHECK1-NEXT: ret void // // +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK1: [[COND_TRUE24]]: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: br label %[[COND_END26:.*]] +// CHECK1: [[COND_FALSE25]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END26]] +// CHECK1: [[COND_END26]]: +// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30:.*]] +// CHECK1: [[FOR_COND30]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK1: [[FOR_BODY32]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN41]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[IF_END53]] +// CHECK1: [[IF_END53]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK1: [[IF_THEN55]]: +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END60]] +// CHECK1: [[IF_END60]]: +// CHECK1-NEXT: br label %[[FOR_INC61:.*]] +// CHECK1: [[FOR_INC61]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: [[FOR_END63]]: +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70:.*]] +// CHECK1: [[FOR_COND70]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK1: [[FOR_BODY72]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK1-NEXT: br label %[[FOR_INC73:.*]] +// CHECK1: [[FOR_INC73]]: +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70]] +// CHECK1: [[FOR_END74]]: +// CHECK1-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @body( // CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -961,7 +1217,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -970,7 +1225,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1002,107 +1256,103 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[END2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK2: [[COND_TRUE]]: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK2-NEXT: br label %[[COND_END:.*]] // CHECK2: [[COND_FALSE]]: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK2-NEXT: br label %[[COND_END]] // CHECK2: [[COND_END]]: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], %[[COND_TRUE]] ], [ [[TMP24]], %[[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP29]], [[MUL]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP32]], [[MUL19]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] // CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP35]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] // CHECK2: [[IF_THEN22]]: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP39]], [[TMP40]] -// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP38]], [[MUL23]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] // CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP41]], [[MUL25]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] // CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]]) // CHECK2-NEXT: br label %[[IF_END27]] // CHECK2: [[IF_END27]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP45]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK2: [[FOR_END]]: @@ -1114,13 +1364,11 @@ extern "C" void foo4() { // CHECK2-NEXT: [[ENTRY:.*:]] // CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1130,48 +1378,43 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB03:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB04:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_ST05:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_NI06:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV07:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB117:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_LB118:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_ST119:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_NI120:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV122:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[__RANGE223:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[__END224:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[__BEGIN227:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_31:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_32:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_TEMP_142:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_FUSE_MAX48:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX54:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 @@ -1198,225 +1441,219 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB03]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB04]], align 4 -// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST05]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 -// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI06]], align 8 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 // CHECK2-NEXT: store i32 42, ptr [[C]], align 4 // CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 // CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 // CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY8:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY8]], ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP11]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY10]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_11]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 // CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 -// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i64 [[SUB13]], 1 -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[ADD14]], 1 -// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i64 [[DIV15]], 1 -// CHECK2-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB117]], align 8 -// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB118]], align 8 -// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST119]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_12]], align 8 -// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK2-NEXT: store i64 [[ADD21]], ptr [[DOTOMP_NI120]], align 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 // CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 -// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY25:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 -// CHECK2-NEXT: [[ADD_PTR26:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY25]], i64 256 -// CHECK2-NEXT: store ptr [[ADD_PTR26]], ptr [[__END224]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP18]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__RANGE223]], align 8 -// CHECK2-NEXT: [[ARRAYDECAY30:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP19]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[ARRAYDECAY30]], ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[__END224]], align 8 -// CHECK2-NEXT: store ptr [[TMP20]], ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_31]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[SUB_PTR_LHS_CAST33:%.*]] = ptrtoint ptr [[TMP21]] to i64 -// CHECK2-NEXT: [[SUB_PTR_RHS_CAST34:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK2-NEXT: [[SUB_PTR_SUB35:%.*]] = sub i64 [[SUB_PTR_LHS_CAST33]], [[SUB_PTR_RHS_CAST34]] -// CHECK2-NEXT: [[SUB_PTR_DIV36:%.*]] = sdiv exact i64 [[SUB_PTR_SUB35]], 8 -// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[SUB_PTR_DIV36]], 1 -// CHECK2-NEXT: [[ADD38:%.*]] = add nsw i64 [[SUB37]], 1 -// CHECK2-NEXT: [[DIV39:%.*]] = sdiv i64 [[ADD38]], 1 -// CHECK2-NEXT: [[SUB40:%.*]] = sub nsw i64 [[DIV39]], 1 -// CHECK2-NEXT: store i64 [[SUB40]], ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB2]], align 8 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_32]], align 8 -// CHECK2-NEXT: [[ADD41:%.*]] = add nsw i64 [[TMP24]], 1 -// CHECK2-NEXT: store i64 [[ADD41]], ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK2-NEXT: store i64 [[TMP25]], ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: [[CMP43:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] -// CHECK2-NEXT: br i1 [[CMP43]], label %[[COND_TRUE44:.*]], label %[[COND_FALSE45:.*]] -// CHECK2: [[COND_TRUE44]]: -// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_TEMP_142]], align 8 -// CHECK2-NEXT: br label %[[COND_END46:.*]] -// CHECK2: [[COND_FALSE45]]: -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: br label %[[COND_END46]] -// CHECK2: [[COND_END46]]: -// CHECK2-NEXT: [[COND47:%.*]] = phi i64 [ [[TMP28]], %[[COND_TRUE44]] ], [ [[TMP29]], %[[COND_FALSE45]] ] -// CHECK2-NEXT: store i64 [[COND47]], ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[CMP49:%.*]] = icmp sgt i64 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: br i1 [[CMP49]], label %[[COND_TRUE50:.*]], label %[[COND_FALSE51:.*]] -// CHECK2: [[COND_TRUE50]]: -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 -// CHECK2-NEXT: br label %[[COND_END52:.*]] -// CHECK2: [[COND_FALSE51]]: -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: br label %[[COND_END52]] -// CHECK2: [[COND_END52]]: -// CHECK2-NEXT: [[COND53:%.*]] = phi i64 [ [[TMP32]], %[[COND_TRUE50]] ], [ [[TMP33]], %[[COND_FALSE51]] ] -// CHECK2-NEXT: store i64 [[COND53]], ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK2: [[COND_TRUE42]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: br label %[[COND_END44:.*]] +// CHECK2: [[COND_FALSE43]]: +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END44]] +// CHECK2: [[COND_END44]]: +// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK2: [[COND_TRUE48]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50:.*]] +// CHECK2: [[COND_FALSE49]]: +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50]] +// CHECK2: [[COND_END50]]: +// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX48]], align 8 -// CHECK2-NEXT: [[CMP55:%.*]] = icmp slt i64 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: br i1 [[CMP55]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_NI06]], align 8 -// CHECK2-NEXT: [[CMP56:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: br i1 [[CMP56]], label %[[IF_THEN:.*]], label %[[IF_END76:.*]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_LB04]], align 4 -// CHECK2-NEXT: [[CONV57:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_ST05]], align 4 -// CHECK2-NEXT: [[CONV58:%.*]] = sext i32 [[TMP39]] to i64 -// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV58]], [[TMP40]] -// CHECK2-NEXT: [[ADD59:%.*]] = add nsw i64 [[CONV57]], [[MUL]] -// CHECK2-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD59]] to i32 -// CHECK2-NEXT: store i32 [[CONV60]], ptr [[DOTOMP_IV07]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV07]], align 4 -// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i32 [[TMP41]], 1 -// CHECK2-NEXT: [[ADD62:%.*]] = add nsw i32 0, [[MUL61]] -// CHECK2-NEXT: store i32 [[ADD62]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP63:%.*]] = icmp slt i32 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: br i1 [[CMP63]], label %[[IF_THEN64:.*]], label %[[IF_END:.*]] -// CHECK2: [[IF_THEN64]]: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP45]], [[TMP46]] -// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 [[TMP44]], [[MUL65]] -// CHECK2-NEXT: store i32 [[ADD66]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[MUL67:%.*]] = mul nsw i32 [[TMP47]], 1 -// CHECK2-NEXT: [[ADD68:%.*]] = add nsw i32 0, [[MUL67]] -// CHECK2-NEXT: store i32 [[ADD68]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP48]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN62]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP69:%.*]] = icmp slt i32 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: br i1 [[CMP69]], label %[[IF_THEN70:.*]], label %[[IF_END75:.*]] -// CHECK2: [[IF_THEN70]]: -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP52]], [[TMP53]] -// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 [[TMP51]], [[MUL71]] -// CHECK2-NEXT: store i32 [[ADD72]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP54]], 2 -// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i32 0, [[MUL73]] -// CHECK2-NEXT: store i32 [[ADD74]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP55]]) -// CHECK2-NEXT: br label %[[IF_END75]] -// CHECK2: [[IF_END75]]: -// CHECK2-NEXT: br label %[[IF_END76]] -// CHECK2: [[IF_END76]]: -// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_NI120]], align 8 -// CHECK2-NEXT: [[CMP77:%.*]] = icmp slt i64 [[TMP56]], [[TMP57]] -// CHECK2-NEXT: br i1 [[CMP77]], label %[[IF_THEN78:.*]], label %[[IF_END83:.*]] -// CHECK2: [[IF_THEN78]]: -// CHECK2-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB118]], align 8 -// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_ST119]], align 8 -// CHECK2-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], [[TMP60]] -// CHECK2-NEXT: [[ADD80:%.*]] = add nsw i64 [[TMP58]], [[MUL79]] -// CHECK2-NEXT: store i64 [[ADD80]], ptr [[DOTOMP_IV122]], align 8 -// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV122]], align 8 -// CHECK2-NEXT: [[MUL81:%.*]] = mul nsw i64 [[TMP62]], 1 -// CHECK2-NEXT: [[ADD_PTR82:%.*]] = getelementptr inbounds double, ptr [[TMP61]], i64 [[MUL81]] -// CHECK2-NEXT: store ptr [[ADD_PTR82]], ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 -// CHECK2-NEXT: store ptr [[TMP63]], ptr [[V]], align 8 -// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[C]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[V]], align 8 -// CHECK2-NEXT: [[TMP66:%.*]] = load double, ptr [[TMP65]], align 8 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP64]], double noundef [[TMP66]]) -// CHECK2-NEXT: br label %[[IF_END83]] -// CHECK2: [[IF_END83]]: -// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 -// CHECK2-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP67]], [[TMP68]] -// CHECK2-NEXT: br i1 [[CMP84]], label %[[IF_THEN85:.*]], label %[[IF_END90:.*]] -// CHECK2: [[IF_THEN85]]: -// CHECK2-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 -// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 -// CHECK2-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], [[TMP71]] -// CHECK2-NEXT: [[ADD87:%.*]] = add nsw i64 [[TMP69]], [[MUL86]] -// CHECK2-NEXT: store i64 [[ADD87]], ptr [[DOTOMP_IV2]], align 8 -// CHECK2-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 -// CHECK2-NEXT: [[TMP73:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 -// CHECK2-NEXT: [[MUL88:%.*]] = mul nsw i64 [[TMP73]], 1 -// CHECK2-NEXT: [[ADD_PTR89:%.*]] = getelementptr inbounds double, ptr [[TMP72]], i64 [[MUL88]] -// CHECK2-NEXT: store ptr [[ADD_PTR89]], ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: [[TMP74:%.*]] = load ptr, ptr [[__BEGIN227]], align 8 -// CHECK2-NEXT: store ptr [[TMP74]], ptr [[VV]], align 8 -// CHECK2-NEXT: [[TMP75:%.*]] = load i32, ptr [[CC]], align 4 -// CHECK2-NEXT: [[TMP76:%.*]] = load ptr, ptr [[VV]], align 8 -// CHECK2-NEXT: [[TMP77:%.*]] = load double, ptr [[TMP76]], align 8 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP75]], double noundef [[TMP77]]) -// CHECK2-NEXT: br label %[[IF_END90]] -// CHECK2: [[IF_END90]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK2: [[IF_THEN68]]: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END73]] +// CHECK2: [[IF_END73]]: +// CHECK2-NEXT: br label %[[IF_END74]] +// CHECK2: [[IF_END74]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK2: [[IF_THEN76]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK2-NEXT: br label %[[IF_END81]] +// CHECK2: [[IF_END81]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK2: [[IF_THEN83]]: +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK2-NEXT: br label %[[IF_END88]] +// CHECK2: [[IF_END88]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX54]], align 8 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP78]], 1 -// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX54]], align 8 +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 // CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void @@ -1427,13 +1664,11 @@ extern "C" void foo4() { // CHECK2-NEXT: [[ENTRY:.*:]] // CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1448,12 +1683,10 @@ extern "C" void foo4() { // CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: store i32 127, ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 // CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[K]], align 4 -// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 // CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 @@ -1573,6 +1806,277 @@ extern "C" void foo4() { // CHECK2-NEXT: ret void // // +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK2: [[COND_TRUE24]]: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: br label %[[COND_END26:.*]] +// CHECK2: [[COND_FALSE25]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END26]] +// CHECK2: [[COND_END26]]: +// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30:.*]] +// CHECK2: [[FOR_COND30]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK2: [[FOR_BODY32]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN41]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[IF_END53]] +// CHECK2: [[IF_END53]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK2: [[IF_THEN55]]: +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END60]] +// CHECK2: [[IF_END60]]: +// CHECK2-NEXT: br label %[[FOR_INC61:.*]] +// CHECK2: [[FOR_INC61]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: [[FOR_END63]]: +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70:.*]] +// CHECK2: [[FOR_COND70]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK2: [[FOR_BODY72]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK2-NEXT: br label %[[FOR_INC73:.*]] +// CHECK2: [[FOR_INC73]]: +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70]] +// CHECK2: [[FOR_END74]]: +// CHECK2-NEXT: ret void +// +// // CHECK2-LABEL: define dso_local void @tfoo2( // CHECK2-SAME: ) #[[ATTR0]] { // CHECK2-NEXT: [[ENTRY:.*:]] @@ -1593,7 +2097,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 @@ -1602,7 +2105,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 @@ -1611,7 +2113,6 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 @@ -1641,174 +2142,168 @@ extern "C" void foo4() { // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB0]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP17]] +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] // CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 // CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[START_ADDR]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[END_ADDR]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP29]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP30]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] // CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 // CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_UB2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 -// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP32]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] // CHECK2: [[COND_TRUE]]: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 // CHECK2-NEXT: br label %[[COND_END:.*]] // CHECK2: [[COND_FALSE]]: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 // CHECK2-NEXT: br label %[[COND_END]] // CHECK2: [[COND_END]]: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP36]], %[[COND_TRUE]] ], [ [[TMP37]], %[[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] // CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] // CHECK2: [[COND_TRUE30]]: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 // CHECK2-NEXT: br label %[[COND_END32:.*]] // CHECK2: [[COND_FALSE31]]: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 // CHECK2-NEXT: br label %[[COND_END32]] // CHECK2: [[COND_END32]]: -// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP40]], %[[COND_TRUE30]] ], [ [[TMP41]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] // CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 // CHECK2-NEXT: br label %[[FOR_COND:.*]] // CHECK2: [[FOR_COND]]: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 -// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] // CHECK2: [[FOR_BODY]]: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 -// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] // CHECK2: [[IF_THEN]]: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP47]], [[TMP48]] -// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP46]], [[MUL]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP50]], [[TMP51]] -// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP49]], [[MUL37]] +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] // CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]]) // CHECK2-NEXT: br label %[[IF_END]] // CHECK2: [[IF_END]]: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 -// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] // CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] // CHECK2: [[IF_THEN40]]: -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 -// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP56]], [[TMP57]] -// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP55]], [[MUL41]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] // CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 -// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP59]], [[TMP60]] -// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP58]], [[MUL43]] +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] // CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]]) +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]]) // CHECK2-NEXT: br label %[[IF_END45]] // CHECK2: [[IF_END45]]: -// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 -// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] // CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] // CHECK2: [[IF_THEN47]]: -// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 -// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP65]], [[TMP66]] -// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP64]], [[MUL48]] +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] // CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 -// CHECK2-NEXT: [[TMP69:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 -// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP68]], [[TMP69]] -// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP67]], [[MUL50]] +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] // CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP70:%.*]] = load i32, ptr [[K]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP70]]) +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]]) // CHECK2-NEXT: br label %[[IF_END52]] // CHECK2: [[IF_END52]]: // CHECK2-NEXT: br label %[[FOR_INC:.*]] // CHECK2: [[FOR_INC]]: -// CHECK2-NEXT: [[TMP71:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP71]], 1 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 -// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: [[FOR_END]]: // CHECK2-NEXT: ret void // @@ -1819,6 +2314,8 @@ extern "C" void foo4() { // CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} // CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} // CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} //. // CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} // CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} @@ -1826,4 +2323,6 @@ extern "C" void foo4() { // CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} // CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} // CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} //. >From 860fcd94d930c9644b4d0427471f2873e7afcf8b Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:44:48 +0000 Subject: [PATCH 05/13] Fixed missing diagnostic groups in warnings --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 002aa7a774fbe..e85cd32d78b5c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11613,7 +11613,8 @@ def note_omp_implicit_dsa : Note< def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; def warn_omp_different_loop_ind_var_types : Warning < - "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">; + "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">, + InGroup; def err_omp_not_canonical_loop : Error < "loop after '#pragma omp %0' is not in canonical form">; def err_omp_not_a_loop_sequence : Error < @@ -11624,7 +11625,8 @@ def err_omp_invalid_looprange : Error < "loop range in '#pragma omp %0' exceeds the number of available loops: " "range end '%1' is greater than the total number of loops '%2'">; def warn_omp_redundant_fusion : Warning < - "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">; + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, + InGroup; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; >From 65cbfeb945e6b8016696906db43dd590adb285b2 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:49:50 +0000 Subject: [PATCH 06/13] Fixed formatting and comments --- clang/lib/Sema/SemaOpenMP.cpp | 112 ++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 54 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 3ce256f3ec23b..2985b256cf153 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14197,42 +14197,43 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( } // Overloaded base case function -template -static bool tryHandleAs(T *t, F &&) { - return false; +template static bool tryHandleAs(T *t, F &&) { + return false; } /** - * Tries to recursively cast `t` to one of the given types and invokes `f` if successful. + * Tries to recursively cast `t` to one of the given types and invokes `f` if + * successful. * * @tparam Class The first type to check. * @tparam Rest The remaining types to check. * @tparam T The base type of `t`. - * @tparam F The callable type for the function to invoke upon a successful cast. + * @tparam F The callable type for the function to invoke upon a successful + * cast. * @param t The object to be checked. * @param f The function to invoke if `t` matches `Class`. * @return `true` if `t` matched any type and `f` was called, otherwise `false`. */ template static bool tryHandleAs(T *t, F &&f) { - if (Class *c = dyn_cast(t)) { - f(c); - return true; - } else { - return tryHandleAs(t, std::forward(f)); - } + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } else { + return tryHandleAs(t, std::forward(f)); + } } // Updates OriginalInits by checking Transform against loop transformation // directives and appending their pre-inits if a match is found. static void updatePreInits(OMPLoopBasedDirective *Transform, SmallVectorImpl> &PreInits) { - if (!tryHandleAs( - Transform, [&PreInits](auto *Dir) { - appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); - })) - llvm_unreachable("Unhandled loop transformation"); + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); } bool SemaOpenMP::checkTransformableLoopNest( @@ -14310,43 +14311,42 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { unsigned getNestedLoopCount() const { return NestedLoopCount; } bool VisitForStmt(ForStmt *FS) override { - ++NestedLoopCount; - return true; + ++NestedLoopCount; + return true; } bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { - ++NestedLoopCount; - return true; + ++NestedLoopCount; + return true; } bool TraverseStmt(Stmt *S) override { - if (!S) + if (!S) return true; - // Skip traversal of all expressions, including special cases like - // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions - // may contain inner statements (and even loops), but they are not part - // of the syntactic body of the surrounding loop structure. - // Therefore must not be counted - if (isa(S)) + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) return true; - // Only recurse into CompoundStmt (block {}) and loop bodies - if (isa(S) || isa(S) || - isa(S)) { + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S) || isa(S) || isa(S)) { return DynamicRecursiveASTVisitor::TraverseStmt(S); - } + } - // Stop traversal of the rest of statements, that break perfect - // loop nesting, such as control flow (IfStmt, SwitchStmt...) - return true; + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; } bool TraverseDecl(Decl *D) override { - // Stop in the case of finding a declaration, it is not important - // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, - // FunctionDecl...) - return true; + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; } }; @@ -14504,15 +14504,14 @@ bool SemaOpenMP::analyzeLoopSequence( return isa(Child); }; - // High level grammar validation for (auto *Child : LoopSeqStmt->children()) { - if (!Child) + if (!Child) continue; - // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { + // Skip over non-loop-sequence statements + if (!isLoopSequenceDerivation(Child)) { Child = Child->IgnoreContainers(); // Ignore empty compound statement @@ -14530,9 +14529,9 @@ bool SemaOpenMP::analyzeLoopSequence( // Already been treated, skip this children continue; } - } - // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { + } + // Regular loop sequence handling + if (isLoopSequenceDerivation(Child)) { if (isLoopGeneratingStmt(Child)) { if (!analyzeLoopGeneration(Child)) { return false; @@ -14546,12 +14545,12 @@ bool SemaOpenMP::analyzeLoopSequence( // Update the Loop Sequence size by one ++LoopSeqSize; } - } else { + } else { // Report error for invalid statement inside canonical loop sequence Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; - } + } } return true; } @@ -14568,9 +14567,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Checks whether the given statement is a compound statement if (!isa(AStmt)) { - Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) - << getOpenMPDirectiveName(Kind); - return false; + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; } // Number of top level canonical loop nests observed (And acts as index) LoopSeqSize = 0; @@ -14601,7 +14600,7 @@ bool SemaOpenMP::checkTransformableLoopSequence( OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind)) { - return false; + return false; } if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) @@ -15315,7 +15314,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *LoopStmt = nullptr; collectLoopStmts(AStmt, {LoopStmt}); - // Determine the PreInit declarations.e + // Determine the PreInit declarations. SmallVector PreInits; addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); @@ -15931,13 +15930,18 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CountVal = CountInt.getZExtValue(); }; - // Checks if the loop range is valid + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, unsigned NumLoops) -> bool { return FirstVal + CountVal - 1 <= NumLoops; }; uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + // Validates the loop range after evaluating the semantic information + // and ensures that the range is valid for the given loop sequence size. + // Expressions are evaluated at compile time to obtain constant values. if (LRC) { EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, CountVal); >From b0fb1b3e26f1d9ceaac4495dcfad84f54f96d2a2 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 9 May 2025 10:58:54 +0000 Subject: [PATCH 07/13] Added minimal changes to enable flang future implementation --- flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 9 +++++++++ flang/lib/Lower/OpenMP/Clauses.cpp | 5 +++++ flang/lib/Lower/OpenMP/Clauses.h | 1 + flang/lib/Parser/openmp-parsers.cpp | 7 +++++++ flang/lib/Parser/unparse.cpp | 7 +++++++ flang/lib/Semantics/check-omp-structure.cpp | 9 +++++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + 8 files changed, 40 insertions(+) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index e3eed6aed8079..76aa3f7b90156 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -600,6 +600,7 @@ class ParseTreeDumper { NODE(OmpLinearClause, Modifier) NODE(parser, OmpLinearModifier) NODE_ENUM(OmpLinearModifier, Value) + NODE(parser, OmpLoopRangeClause) NODE(parser, OmpStepComplexModifier) NODE(parser, OmpStepSimpleModifier) NODE(parser, OmpLoopDirective) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 61f97b855b0e5..d32db62db2628 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4367,6 +4367,15 @@ struct OmpLinearClause { std::tuple t; }; +// Ref: [6.0:207-208] +// +// loop-range-clause -> +// LOOPRANGE(first, count) // since 6.0 +struct OmpLoopRangeClause { + TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause); + std::tuple t; +}; + // Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] // // map-clause -> diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index b599d69a36272..a38249bf2b588 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -997,6 +997,11 @@ Link make(const parser::OmpClause::Link &inp, return Link{/*List=*/makeObjects(inp.v, semaCtx)}; } +LoopRange make(const parser::OmpClause::Looprange &inp, + semantics::SemanticsContext &semaCtx) { + llvm_unreachable("Unimplemented: looprange"); +} + Map make(const parser::OmpClause::Map &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpMapClause diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index d7ab21d428e32..bda8571e65f23 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT; using InReduction = tomp::clause::InReductionT; using IsDevicePtr = tomp::clause::IsDevicePtrT; using Lastprivate = tomp::clause::LastprivateT; +using LoopRange = tomp::clause::LoopRangeT; using Linear = tomp::clause::LinearT; using Link = tomp::clause::LinkT; using Map = tomp::clause::MapT; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index c55642d969503..d53389746dbec 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -847,6 +847,11 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) +TYPE_PARSER( + construct(scalarIntConstantExpr, + "," >> scalarIntConstantExpr) +) + // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1020,6 +1025,8 @@ TYPE_PARSER( // parenthesized(Parser{}))) || "LINK" >> construct(construct( parenthesized(Parser{}))) || + "LOOPRANGE" >> construct(construct( + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ed0f227fd5b98..18e8a63ca68aa 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2318,6 +2318,13 @@ class UnparseVisitor { } } } + void Unparse(const OmpLoopRangeClause &x) { + Word("LOOPRANGE("); + Walk(std::get<0>(x.t)); + Put(", "); + Walk(std::get<1>(x.t)); + Put(")"); + } void Unparse(const OmpReductionClause &x) { using Modifier = OmpReductionClause::Modifier; Walk(std::get>>(x.t), ": "); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 68cea6739830d..2a03c6a1fd0e4 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4446,6 +4446,15 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) +void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { + context_.Say(GetContext().clauseSource, + "LOOPRANGE clause is not implemented yet"_err_en_US, + ContextDirectiveAsFortran()); +} + +void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) { + context_.Say(GetContext().clauseSource, + "FREE_AGENT clause is not implemented yet"_err_en_US, // Restrictions specific to each clause are implemented apart from the // generalized restrictions. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 989b35a7caa2a..f8acdc62aba3d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -276,6 +276,7 @@ def OMPC_Link : Clause<[Spelling<"link">]> { } def OMPC_LoopRange : Clause<[Spelling<"looprange">]> { let clangClass = "OMPLoopRangeClause"; + let flangClass = "OmpLoopRangeClause"; } def OMPC_Map : Clause<[Spelling<"map">]> { let clangClass = "OMPMapClause"; >From b252aa910ef7c5c278a86bd7195bbf3bb18dd18d Mon Sep 17 00:00:00 2001 From: eZWALT Date: Wed, 21 May 2025 13:14:22 +0000 Subject: [PATCH 08/13] Address basic PR feedback --- clang/include/clang/AST/OpenMPClause.h | 93 ++++---- clang/include/clang/AST/StmtOpenMP.h | 2 +- clang/include/clang/Sema/SemaOpenMP.h | 14 +- clang/lib/AST/OpenMPClause.cpp | 17 +- clang/lib/CodeGen/CGExpr.cpp | 5 +- clang/lib/CodeGen/CodeGenFunction.h | 4 - clang/lib/Sema/SemaOpenMP.cpp | 224 +++++++++----------- flang/lib/Semantics/check-omp-structure.cpp | 3 - 8 files changed, 166 insertions(+), 196 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 8f937cdef9cd0..3df5133a17fb4 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1153,82 +1153,73 @@ class OMPFullClause final : public OMPNoChildClause { /// for(int j = 0; j < 256; j+=2) /// for(int k = 127; k >= 0; --k) /// \endcode -class OMPLoopRangeClause final : public OMPClause { +class OMPLoopRangeClause final + : public OMPClause, + private llvm::TrailingObjects { friend class OMPClauseReader; - - explicit OMPLoopRangeClause() - : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + friend class llvm::TrailingObjects; /// Location of '(' SourceLocation LParenLoc; - /// Location of 'first' - SourceLocation FirstLoc; - - /// Location of 'count' - SourceLocation CountLoc; - - /// Expr associated with 'first' argument - Expr *First = nullptr; - - /// Expr associated with 'count' argument - Expr *Count = nullptr; - - /// Set 'first' - void setFirst(Expr *First) { this->First = First; } + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; - /// Set 'count' - void setCount(Expr *Count) { this->Count = Count; } + /// Number of looprange arguments (always 2: first, count) + unsigned NumArgs = 2; - /// Set location of '('. - void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } - - /// Set location of 'first' argument - void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + /// Set the argument expressions. + void setArgs(ArrayRef Args) { + assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments"); + std::copy(Args.begin(), Args.end(), getTrailingObjects()); + } - /// Set location of 'count' argument - void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {} public: - /// Build an AST node for a 'looprange' clause - /// - /// \param StartLoc Starting location of the clause. - /// \param LParenLoc Location of '('. - /// \param ModifierLoc Modifier location. - /// \param + /// Build a 'looprange' clause AST node. static OMPLoopRangeClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, - SourceLocation EndLoc, Expr *First, Expr *Count); + SourceLocation EndLoc, ArrayRef Args); - /// Build an empty 'looprange' node for deserialization - /// - /// \param C Context of the AST. + /// Build an empty 'looprange' clause node. static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); - /// Returns the location of '(' + // Location getters/setters SourceLocation getLParenLoc() const { return LParenLoc; } - - /// Returns the location of 'first' SourceLocation getFirstLoc() const { return FirstLoc; } - - /// Returns the location of 'count' SourceLocation getCountLoc() const { return CountLoc; } - /// Returns the argument 'first' or nullptr if not set - Expr *getFirst() const { return cast_or_null(First); } + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } - /// Returns the argument 'count' or nullptr if not set - Expr *getCount() const { return cast_or_null(Count); } + /// Get looprange arguments: first and count + Expr *getFirst() const { return getArgs()[0]; } + Expr *getCount() const { return getArgs()[1]; } - child_range children() { - return child_range(reinterpret_cast(&First), - reinterpret_cast(&Count) + 1); + /// Set looprange arguments: first and count + void setFirst(Expr *E) { getArgs()[0] = E; } + void setCount(Expr *E) { getArgs()[1] = E; } + + MutableArrayRef getArgs() { + return MutableArrayRef(getTrailingObjects(), NumArgs); + } + ArrayRef getArgs() const { + return ArrayRef(getTrailingObjects(), NumArgs); } + child_range children() { + return child_range(reinterpret_cast(getArgs().begin()), + reinterpret_cast(getArgs().end())); + } const_child_range children() const { - auto Children = const_cast(this)->children(); - return const_child_range(Children.begin(), Children.end()); + auto AR = getArgs(); + return const_child_range(reinterpret_cast(AR.begin()), + reinterpret_cast(AR.end())); } child_range used_children() { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 6425f6616a558..0421c06245cac 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5883,7 +5883,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { EndLoc, NumLoops) { // Interchange produces a single top-level canonical loop // nest, with the exact same amount of total loops - setNumGeneratedLoops(NumLoops); + setNumGeneratedLoops(3 * NumLoops); setNumGeneratedLoopNests(1); } diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 547ea95c6cd5d..f848c4a7d715e 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1492,7 +1492,7 @@ class SemaOpenMP : public SemaBase { bool checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits); + Stmt *&Body, SmallVectorImpl> &OriginalInits); /// @brief Categories of loops encountered during semantic OpenMP loop /// analysis @@ -1555,9 +1555,9 @@ class SemaOpenMP : public SemaBase { Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context, OpenMPDirectiveKind Kind); @@ -1591,9 +1591,9 @@ class SemaOpenMP : public SemaBase { unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0b5808eb100e4..e0570262b2a05 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1026,22 +1026,25 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { OMPLoopRangeClause * OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc, - SourceLocation FirstLoc, SourceLocation CountLoc, - Expr *First, Expr *Count) { + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc, + ArrayRef Args) { + + assert(Args.size() == 2 && + "looprange clause must have exactly two arguments"); OMPLoopRangeClause *Clause = CreateEmpty(C); Clause->setLocStart(StartLoc); Clause->setLParenLoc(LParenLoc); - Clause->setLocEnd(EndLoc); Clause->setFirstLoc(FirstLoc); Clause->setCountLoc(CountLoc); - Clause->setFirst(First); - Clause->setCount(Count); + Clause->setLocEnd(EndLoc); + Clause->setArgs(Args); return Clause; } OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { - return new (C) OMPLoopRangeClause(); + void *Mem = C.Allocate(totalSizeToAlloc(2)); + return new (Mem) OMPLoopRangeClause(); } OMPAllocateClause *OMPAllocateClause::Create( diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 08049d4d4e37d..f983b88eb61ec 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3256,11 +3256,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); // No other cases for now. - } else { - llvm::dbgs() << "THE DAMN DECLREFEXPR HASN'T BEEN ENTERED IN LOCALDECLMAP\n"; - VD->dumpColor(); + } else llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); - } // Handle threadlocal function locals. if (VD->getTLSKind() != VarDecl::TLS_None) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index bfe24213ed377..fe753e5b688b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -5363,10 +5363,6 @@ class CodeGenFunction : public CodeGenTypeCache { /// Set the address of a local variable. void setAddrOfLocalVar(const VarDecl *VD, Address Addr) { - if (LocalDeclMap.count(VD)) { - llvm::errs() << "Warning: VarDecl already exists in map: "; - VD->dumpColor(); - } assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!"); LocalDeclMap.insert({VD, Addr}); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 2985b256cf153..9819dcfe60360 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14196,38 +14196,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } -// Overloaded base case function +/// Overloaded base case function template static bool tryHandleAs(T *t, F &&) { return false; } -/** - * Tries to recursively cast `t` to one of the given types and invokes `f` if - * successful. - * - * @tparam Class The first type to check. - * @tparam Rest The remaining types to check. - * @tparam T The base type of `t`. - * @tparam F The callable type for the function to invoke upon a successful - * cast. - * @param t The object to be checked. - * @param f The function to invoke if `t` matches `Class`. - * @return `true` if `t` matched any type and `f` was called, otherwise `false`. - */ +/// +/// Tries to recursively cast `t` to one of the given types and invokes `f` if +/// successful. +/// +/// @tparam Class The first type to check. +/// @tparam Rest The remaining types to check. +/// @tparam T The base type of `t`. +/// @tparam F The callable type for the function to invoke upon a successful +/// cast. +/// @param t The object to be checked. +/// @param f The function to invoke if `t` matches `Class`. +/// @return `true` if `t` matched any type and `f` was called, otherwise +/// `false`. template static bool tryHandleAs(T *t, F &&f) { if (Class *c = dyn_cast(t)) { f(c); return true; - } else { - return tryHandleAs(t, std::forward(f)); } + return tryHandleAs(t, std::forward(f)); } -// Updates OriginalInits by checking Transform against loop transformation -// directives and appending their pre-inits if a match is found. +/// Updates OriginalInits by checking Transform against loop transformation +/// directives and appending their pre-inits if a match is found. static void updatePreInits(OMPLoopBasedDirective *Transform, - SmallVectorImpl> &PreInits) { + SmallVectorImpl> &PreInits) { if (!tryHandleAs( Transform, [&PreInits](auto *Dir) { @@ -14239,7 +14238,7 @@ static void updatePreInits(OMPLoopBasedDirective *Transform, bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits) { + Stmt *&Body, SmallVectorImpl> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -14273,40 +14272,40 @@ bool SemaOpenMP::checkTransformableLoopNest( return Result; } -// Counts the total number of nested loops, including the outermost loop (the -// original loop). PRECONDITION of this visitor is that it must be invoked from -// the original loop to be analyzed. The traversal is stop for Decl's and -// Expr's given that they may contain inner loops that must not be counted. -// -// Example AST structure for the code: -// -// int main() { -// #pragma omp fuse -// { -// for (int i = 0; i < 100; i++) { <-- Outer loop -// []() { -// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP -// }; -// for(int j = 0; j < 5; ++j) {} <-- Inner loop -// } -// for (int r = 0; i < 100; i++) { <-- Outer loop -// struct LocalClass { -// void bar() { -// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP -// } -// }; -// for(int k = 0; k < 10; ++k) {} <-- Inner loop -// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP -// } -// } -// } -// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +/// Counts the total number of nested loops, including the outermost loop (the +/// original loop). PRECONDITION of this visitor is that it must be invoked from +/// the original loop to be analyzed. The traversal is stop for Decl's and +/// Expr's given that they may contain inner loops that must not be counted. +/// +/// Example AST structure for the code: +/// +/// int main() { +/// #pragma omp fuse +/// { +/// for (int i = 0; i < 100; i++) { <-- Outer loop +/// []() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// }; +/// for(int j = 0; j < 5; ++j) {} <-- Inner loop +/// } +/// for (int r = 0; i < 100; i++) { <-- Outer loop +/// struct LocalClass { +/// void bar() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// } +/// }; +/// for(int k = 0; k < 10; ++k) {} <-- Inner loop +/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +/// } +/// } +/// } +/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { private: unsigned NestedLoopCount = 0; public: - explicit NestedLoopCounterVisitor() {} + explicit NestedLoopCounterVisitor() = default; unsigned getNestedLoopCount() const { return NestedLoopCount; } @@ -14333,7 +14332,7 @@ class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { return true; // Only recurse into CompoundStmt (block {}) and loop bodies - if (isa(S) || isa(S) || isa(S)) { + if (isa(S)) { return DynamicRecursiveASTVisitor::TraverseStmt(S); } @@ -14354,19 +14353,18 @@ bool SemaOpenMP::analyzeLoopSequence( Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context, OpenMPDirectiveKind Kind) { VarsWithInheritedDSAType TmpDSA; QualType BaseInductionVarType; - // Helper Lambda to handle storing initialization and body statements for both - // ForStmt and CXXForRangeStmt and checks for any possible mismatch between - // induction variables types - auto storeLoopStatements = [&OriginalInits, &ForStmts, &BaseInductionVarType, - this, &Context](Stmt *LoopStmt) { + /// Helper Lambda to handle storing initialization and body statements for + /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch + /// between induction variables types + auto StoreLoopStatements = [&](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { OriginalInits.back().push_back(For->getInit()); ForStmts.push_back(For); @@ -14394,16 +14392,11 @@ bool SemaOpenMP::analyzeLoopSequence( } }; - // Helper lambda functions to encapsulate the processing of different - // derivations of the canonical loop sequence grammar - // - // Modularized code for handling loop generation and transformations - auto analyzeLoopGeneration = [&storeLoopStatements, &LoopHelpers, - &OriginalInits, &TransformsPreInits, - &LoopCategories, &LoopSeqSize, &NumLoops, Kind, - &TmpDSA, &ForStmts, &Context, - &LoopSequencePreInits, this](Stmt *Child) { - auto LoopTransform = dyn_cast(Child); + /// Helper lambda functions to encapsulate the processing of different + /// derivations of the canonical loop sequence grammar + /// Modularized code for handling loop generation and transformations + auto AnalyzeLoopGeneration = [&](Stmt *Child) { + auto *LoopTransform = dyn_cast(Child); Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); @@ -14414,9 +14407,8 @@ bool SemaOpenMP::analyzeLoopSequence( LoopSeqSize += NumGeneratedLoopNests; NumLoops += NumGeneratedLoops; return true; - } - // Unroll full (0 loops produced) - else { + } else { + // Unroll full (0 loops produced) Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; @@ -14443,9 +14435,8 @@ bool SemaOpenMP::analyzeLoopSequence( LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind); - } - // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) - else { + } else { + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) // Process the transformed loop statement OriginalInits.emplace_back(); TransformsPreInits.emplace_back(); @@ -14461,7 +14452,7 @@ bool SemaOpenMP::analyzeLoopSequence( << getOpenMPDirectiveName(Kind); return false; } - storeLoopStatements(TransformedStmt); + StoreLoopStatements(TransformedStmt); updatePreInits(LoopTransform, TransformsPreInits); NumLoops += NumGeneratedLoops; @@ -14470,10 +14461,8 @@ bool SemaOpenMP::analyzeLoopSequence( } }; - // Modularized code for handling regular canonical loops - auto analyzeRegularLoop = [&storeLoopStatements, &LoopHelpers, &OriginalInits, - &LoopSeqSize, &NumLoops, Kind, &TmpDSA, - &LoopCategories, this](Stmt *Child) { + /// Modularized code for handling regular canonical loops + auto AnalyzeRegularLoop = [&](Stmt *Child) { OriginalInits.emplace_back(); LoopHelpers.emplace_back(); LoopCategories.push_back(OMPLoopCategory::RegularLoop); @@ -14488,19 +14477,19 @@ bool SemaOpenMP::analyzeLoopSequence( return false; } - storeLoopStatements(Child); + StoreLoopStatements(Child); auto NLCV = NestedLoopCounterVisitor(); NLCV.TraverseStmt(Child); NumLoops += NLCV.getNestedLoopCount(); return true; }; - // Helper functions to validate canonical loop sequence grammar is valid - auto isLoopSequenceDerivation = [](auto *Child) { - return isa(Child) || isa(Child) || - isa(Child); + /// Helper functions to validate loop sequence grammar derivations + auto IsLoopSequenceDerivation = [](auto *Child) { + return isa(Child); }; - auto isLoopGeneratingStmt = [](auto *Child) { + /// Helper functions to validate loop generating grammar derivations + auto IsLoopGeneratingStmt = [](auto *Child) { return isa(Child); }; @@ -14511,7 +14500,7 @@ bool SemaOpenMP::analyzeLoopSequence( continue; // Skip over non-loop-sequence statements - if (!isLoopSequenceDerivation(Child)) { + if (!IsLoopSequenceDerivation(Child)) { Child = Child->IgnoreContainers(); // Ignore empty compound statement @@ -14531,17 +14520,17 @@ bool SemaOpenMP::analyzeLoopSequence( } } // Regular loop sequence handling - if (isLoopSequenceDerivation(Child)) { - if (isLoopGeneratingStmt(Child)) { - if (!analyzeLoopGeneration(Child)) { + if (IsLoopSequenceDerivation(Child)) { + if (IsLoopGeneratingStmt(Child)) { + if (!AnalyzeLoopGeneration(Child)) return false; - } - // analyzeLoopGeneration updates Loop Sequence size accordingly + + // AnalyzeLoopGeneration updates Loop Sequence size accordingly } else { - if (!analyzeRegularLoop(Child)) { + if (!AnalyzeRegularLoop(Child)) return false; - } + // Update the Loop Sequence size by one ++LoopSeqSize; } @@ -14560,9 +14549,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( unsigned &NumLoops, SmallVectorImpl &LoopHelpers, SmallVectorImpl &ForStmts, - SmallVectorImpl> &OriginalInits, - SmallVectorImpl> &TransformsPreInits, - SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, SmallVectorImpl &LoopCategories, ASTContext &Context) { // Checks whether the given statement is a compound statement @@ -14598,10 +14587,9 @@ bool SemaOpenMP::checkTransformableLoopSequence( // Recursive entry point to process the main loop sequence if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, - LoopSequencePreInits, LoopCategories, Context, - Kind)) { + LoopSequencePreInits, LoopCategories, Context, Kind)) return false; - } + if (LoopSeqSize <= 0) { Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) << getOpenMPDirectiveName(Kind); @@ -14693,7 +14681,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14970,7 +14958,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15231,7 +15219,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15499,7 +15487,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15691,7 +15679,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 2> OriginalInits; + SmallVector, 2> OriginalInits; if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15878,9 +15866,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, CaptureVars CopyTransformer(SemaRef); // Ensure the structured block is not empty - if (!AStmt) { + if (!AStmt) return StmtError(); - } unsigned NumLoops = 1; unsigned LoopSeqSize = 1; @@ -15899,16 +15886,15 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops SmallVector LoopHelpers; SmallVector LoopStmts; - SmallVector> OriginalInits; - SmallVector> TransformsPreInits; - SmallVector> LoopSequencePreInits; + SmallVector> OriginalInits; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; SmallVector LoopCategories; if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, LoopHelpers, LoopStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, - LoopCategories, Context)) { + LoopCategories, Context)) return StmtError(); - } // Handle clauses, which can be any of the following: [looprange, apply] const OMPLoopRangeClause *LRC = @@ -15998,9 +15984,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // expressions. Generates both the variable declaration and the corresponding // initialization statement. auto CreateHelperVarAndStmt = - [&SemaRef = this->SemaRef, &Context, &CopyTransformer, - &IVType](Expr *ExprToCopy, const std::string &BaseName, unsigned I, - bool NeedsNewVD = false) { + [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, + unsigned I, bool NeedsNewVD = false) { Expr *TransformedExpr = AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); if (!TransformedExpr) @@ -16044,9 +16029,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Transformations that apply this concept: Loopranged Fuse, Split if (!LoopSequencePreInits.empty()) { for (const auto <PreInits : LoopSequencePreInits) { - if (!LTPreInits.empty()) { + if (!LTPreInits.empty()) llvm::append_range(PreInits, LTPreInits); - } } } @@ -16075,9 +16059,9 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Order matters: pre-inits may define variables used in the original // inits such as upper bounds... auto TransformPreInit = TransformsPreInits[TransformIndex++]; - if (!TransformPreInit.empty()) { + if (!TransformPreInit.empty()) llvm::append_range(PreInits, TransformPreInit); - } + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], PreInits); } @@ -17496,13 +17480,15 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( if (CountVal.isInvalid()) Count = nullptr; + SmallVector ArgsVec = {First, Count}; + // OpenMP [6.0, Restrictions] // first + count - 1 must not evaluate to a value greater than the // loop sequence length of the associated canonical loop sequence. // This check must be performed afterwards due to the delayed // parsing and computation of the associated loop sequence return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, - FirstLoc, CountLoc, EndLoc, First, Count); + FirstLoc, CountLoc, EndLoc, ArgsVec); } OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2a03c6a1fd0e4..ac4883c4f2a18 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4452,9 +4452,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { ContextDirectiveAsFortran()); } -void OmpStructureChecker::Enter(const parser::OmpClause::FreeAgent &x) { - context_.Say(GetContext().clauseSource, - "FREE_AGENT clause is not implemented yet"_err_en_US, // Restrictions specific to each clause are implemented apart from the // generalized restrictions. >From e294777879dd46c1859a03c307e70dd03abe11b7 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Thu, 22 May 2025 10:39:39 +0000 Subject: [PATCH 09/13] Removed unncessary warning and updated tests accordingly --- .../clang/Basic/DiagnosticSemaKinds.td | 3 -- clang/lib/Sema/SemaOpenMP.cpp | 21 +-------- clang/test/OpenMP/fuse_messages.cpp | 43 +++++++++++++++---- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e85cd32d78b5c..2bd0f895204c9 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,9 +11612,6 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; -def warn_omp_different_loop_ind_var_types : Warning < - "loop sequence following '#pragma omp %0' contains induction variables of differing types: %1 and %2">, - InGroup; def err_omp_not_canonical_loop : Error < "loop after '#pragma omp %0' is not in canonical form">; def err_omp_not_a_loop_sequence : Error < diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 9819dcfe60360..5f36d968c68fa 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14360,31 +14360,12 @@ bool SemaOpenMP::analyzeLoopSequence( OpenMPDirectiveKind Kind) { VarsWithInheritedDSAType TmpDSA; - QualType BaseInductionVarType; /// Helper Lambda to handle storing initialization and body statements for - /// both ForStmt and CXXForRangeStmt and checks for any possible mismatch - /// between induction variables types + /// both ForStmt and CXXForRangeStmt auto StoreLoopStatements = [&](Stmt *LoopStmt) { if (auto *For = dyn_cast(LoopStmt)) { OriginalInits.back().push_back(For->getInit()); ForStmts.push_back(For); - // Extract induction variable - if (auto *InitStmt = dyn_cast_or_null(For->getInit())) { - if (auto *InitDecl = dyn_cast(InitStmt->getSingleDecl())) { - QualType InductionVarType = InitDecl->getType().getCanonicalType(); - - // Compare with first loop type - if (BaseInductionVarType.isNull()) { - BaseInductionVarType = InductionVarType; - } else if (!Context.hasSameType(BaseInductionVarType, - InductionVarType)) { - Diag(InitDecl->getBeginLoc(), - diag::warn_omp_different_loop_ind_var_types) - << getOpenMPDirectiveName(OMPD_fuse) << BaseInductionVarType - << InductionVarType; - } - } - } } else { auto *CXXFor = cast(LoopStmt); OriginalInits.back().push_back(CXXFor->getBeginStmt()); diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 2a2491d008a0b..4902d424373e5 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -70,15 +70,6 @@ void func() { for(int j = 0; j < 10; ++j); } - //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'unsigned int'}} - //expected-warning at +5 {{loop sequence following '#pragma omp fuse' contains induction variables of differing types: 'int' and 'long long'}} - #pragma omp fuse - { - for(int i = 0; i < 10; ++i); - for(unsigned int j = 0; j < 10; ++j); - for(long long k = 0; k < 100; ++k); - } - //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} #pragma omp fuse { @@ -123,6 +114,40 @@ void func() { for(int j = 0; j < 100; ++j); for(int k = 0; k < 50; ++k); } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}} + #pragma omp fuse looprange(1,6) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + // This fusion results in 2 loops + #pragma omp fuse looprange(1,2) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + } + + //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(2,3) + { + #pragma omp unroll partial(2) + for(int i = 0; i < 10; ++i); + + #pragma omp reverse + for(int j = 0; j < 10; ++j); + + #pragma omp fuse + { + { + #pragma omp reverse + for(int j = 0; j < 10; ++j); + } + for(int k = 0; k < 50; ++k); + } + } } // In a template context, but expression itself not instantiation-dependent >From 1c8f0fe23fdd78de1512505f128fa9e8bff655f9 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 20 Jun 2025 14:17:29 +0000 Subject: [PATCH 10/13] Address formatting issues --- clang/include/clang/Parse/Parser.h | 2 +- clang/lib/AST/StmtOpenMP.cpp | 10 +++++----- clang/lib/Parse/ParseOpenMP.cpp | 2 +- flang/lib/Lower/OpenMP/Clauses.cpp | 2 +- flang/lib/Parser/openmp-parsers.cpp | 8 +++----- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 08bee0078b5ff..9364007f3cf41 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6737,7 +6737,7 @@ class Parser : public CodeCompletionHandler { /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. OMPClause *ParseOpenMPLoopRangeClause(); - + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index f527e6361b5e5..1f49e9f2a0640 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -522,15 +522,15 @@ OMPFuseDirective *OMPFuseDirective::Create( NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); - // The number of top level canonical nests could + // The number of top level canonical nests could // not match the total number of generated loops // Example: // Before fusion: - // for (int i = 0; i < N; ++i) - // for (int j = 0; j < M; ++j) + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) // A[i][j] = i + j; - // - // for (int k = 0; k < P; ++k) + // + // for (int k = 0; k < P; ++k) // B[k] = k * 2; // Here, NumLoopNests = 2, but NumLoops = 3. Dir->setNumGeneratedLoopNests(NumLoopNests); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 2d6d624c1ecc8..48d9c184131cd 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3520,7 +3520,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, break; case OMPC_looprange: Clause = ParseOpenMPLoopRangeClause(); - break; + break; default: break; } diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index a38249bf2b588..c94d56cb57756 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -998,7 +998,7 @@ Link make(const parser::OmpClause::Link &inp, } LoopRange make(const parser::OmpClause::Looprange &inp, - semantics::SemanticsContext &semaCtx) { + semantics::SemanticsContext &semaCtx) { llvm_unreachable("Unimplemented: looprange"); } diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index d53389746dbec..39978e402e63b 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -847,10 +847,8 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) -TYPE_PARSER( - construct(scalarIntConstantExpr, - "," >> scalarIntConstantExpr) -) +TYPE_PARSER(construct( + scalarIntConstantExpr, "," >> scalarIntConstantExpr)) // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1026,7 +1024,7 @@ TYPE_PARSER( // "LINK" >> construct(construct( parenthesized(Parser{}))) || "LOOPRANGE" >> construct(construct( - parenthesized(Parser{}))) || + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( >From 009d8630c7ff97dedc543df04d0b18ab4579a503 Mon Sep 17 00:00:00 2001 From: eZWALT Date: Fri, 20 Jun 2025 14:44:31 +0000 Subject: [PATCH 11/13] Address minor feedback part 2 --- clang/include/clang/AST/OpenMPClause.h | 8 ++++++-- clang/include/clang/AST/StmtOpenMP.h | 1 + clang/include/clang/Basic/DiagnosticSemaKinds.td | 10 +++++----- clang/lib/Sema/SemaOpenMP.cpp | 15 +++++---------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 3df5133a17fb4..478c41322f34a 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1197,12 +1197,16 @@ class OMPLoopRangeClause final void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } - /// Get looprange arguments: first and count + /// Get looprange 'first' expression Expr *getFirst() const { return getArgs()[0]; } + + /// Get looprange 'count' expression Expr *getCount() const { return getArgs()[1]; } - /// Set looprange arguments: first and count + /// Set looprange 'first' expression void setFirst(Expr *E) { getArgs()[0] = E; } + + /// Set looprange 'count' expression void setCount(Expr *E) { getArgs()[1] = E; } MutableArrayRef getArgs() { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 0421c06245cac..5ec3677fc7507 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -976,6 +976,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop /// transformation void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2bd0f895204c9..d807b6b076724 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11612,16 +11612,16 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; -def err_omp_not_canonical_loop : Error < +def err_omp_not_canonical_loop : Error< "loop after '#pragma omp %0' is not in canonical form">; -def err_omp_not_a_loop_sequence : Error < +def err_omp_not_a_loop_sequence : Error< "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; -def err_omp_empty_loop_sequence : Error < +def err_omp_empty_loop_sequence : Error< "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; -def err_omp_invalid_looprange : Error < +def err_omp_invalid_looprange : Error< "loop range in '#pragma omp %0' exceeds the number of available loops: " "range end '%1' is greater than the total number of loops '%2'">; -def warn_omp_redundant_fusion : Warning < +def warn_omp_redundant_fusion : Warning< "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, InGroup; def err_omp_not_for : Error< diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5f36d968c68fa..8aa21c5c01220 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,7 +22,6 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" -#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -48,7 +47,6 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include -#include using namespace clang; using namespace llvm::omp; @@ -14201,7 +14199,6 @@ template static bool tryHandleAs(T *t, F &&) { return false; } -/// /// Tries to recursively cast `t` to one of the given types and invokes `f` if /// successful. /// @@ -14274,7 +14271,7 @@ bool SemaOpenMP::checkTransformableLoopNest( /// Counts the total number of nested loops, including the outermost loop (the /// original loop). PRECONDITION of this visitor is that it must be invoked from -/// the original loop to be analyzed. The traversal is stop for Decl's and +/// the original loop to be analyzed. The traversal stops for Decl's and /// Expr's given that they may contain inner loops that must not be counted. /// /// Example AST structure for the code: @@ -15945,7 +15942,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Select the type with the largest bit width among all induction variables QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); - for (unsigned int I = FirstVal; I < LastVal; ++I) { + for (unsigned I = FirstVal; I < LastVal; ++I) { QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { IVType = CurrentIVType; @@ -16054,9 +16051,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, auto [IVVD, IVDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); - if (!LBVD || !STVD || !NIVD || !IVVD) - assert(LBVD && STVD && NIVD && IVVD && - "OpenMP Fuse Helper variables creation failed"); + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); UBVarDecls.push_back(UBVD); LBVarDecls.push_back(LBVD); @@ -16097,11 +16093,10 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // original.indexk = ivk // body(k); Expr *InitVal = IntegerLiteral::Create(Context, // llvm::APInt(IVWidth, 0), - // } // 1. Create the initialized fuse index - const std::string IndexName = Twine(".omp.fuse.index").str(); + StringRef IndexName = ".omp.fuse.index"; Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), IVType, SourceLocation()); VarDecl *IndexDecl = >From eb464451c30c19745591738d71e3496d4d11514a Mon Sep 17 00:00:00 2001 From: Roger Ferrer Ibanez Date: Mon, 14 Jul 2025 11:34:36 +0000 Subject: [PATCH 12/13] Address some of the feedback --- clang/include/clang/AST/OpenMPClause.h | 46 ++++--------- clang/include/clang/Sema/SemaOpenMP.h | 2 +- clang/lib/AST/OpenMPClause.cpp | 11 ++- clang/lib/Sema/SemaOpenMP.cpp | 92 ++++++++++++-------------- 4 files changed, 63 insertions(+), 88 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 478c41322f34a..5034ff9bacbfc 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1154,11 +1154,8 @@ class OMPFullClause final : public OMPNoChildClause { /// for(int k = 127; k >= 0; --k) /// \endcode class OMPLoopRangeClause final - : public OMPClause, - private llvm::TrailingObjects { + : public OMPClause { friend class OMPClauseReader; - friend class llvm::TrailingObjects; - /// Location of '(' SourceLocation LParenLoc; @@ -1166,24 +1163,25 @@ class OMPLoopRangeClause final SourceLocation FirstLoc, CountLoc; /// Number of looprange arguments (always 2: first, count) - unsigned NumArgs = 2; + static constexpr unsigned NumArgs = 2; + Stmt *Args[NumArgs] = {nullptr, nullptr}; - /// Set the argument expressions. - void setArgs(ArrayRef Args) { - assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments"); - std::copy(Args.begin(), Args.end(), getTrailingObjects()); - } + /// Set looprange 'first' expression + void setFirst(Expr *E) { Args[0] = E; } + + /// Set looprange 'count' expression + void setCount(Expr *E) { Args[1] = E; } /// Build an empty clause for deserialization. explicit OMPLoopRangeClause() - : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {} + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} public: /// Build a 'looprange' clause AST node. static OMPLoopRangeClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, - SourceLocation EndLoc, ArrayRef Args); + SourceLocation EndLoc, Expr* First, Expr* Count); /// Build an empty 'looprange' clause node. static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); @@ -1198,32 +1196,16 @@ class OMPLoopRangeClause final void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } /// Get looprange 'first' expression - Expr *getFirst() const { return getArgs()[0]; } + Expr *getFirst() const { return cast_or_null(Args[0]); } /// Get looprange 'count' expression - Expr *getCount() const { return getArgs()[1]; } - - /// Set looprange 'first' expression - void setFirst(Expr *E) { getArgs()[0] = E; } - - /// Set looprange 'count' expression - void setCount(Expr *E) { getArgs()[1] = E; } - - MutableArrayRef getArgs() { - return MutableArrayRef(getTrailingObjects(), NumArgs); - } - ArrayRef getArgs() const { - return ArrayRef(getTrailingObjects(), NumArgs); - } + Expr *getCount() const { return cast_or_null(Args[1]); } child_range children() { - return child_range(reinterpret_cast(getArgs().begin()), - reinterpret_cast(getArgs().end())); + return child_range(Args, Args + NumArgs); } const_child_range children() const { - auto AR = getArgs(); - return const_child_range(reinterpret_cast(AR.begin()), - reinterpret_cast(AR.end())); + return const_child_range(Args, Args + NumArgs); } child_range used_children() { diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index f848c4a7d715e..09c97dc2e119b 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1494,7 +1494,7 @@ class SemaOpenMP : public SemaBase { SmallVectorImpl &LoopHelpers, Stmt *&Body, SmallVectorImpl> &OriginalInits); - /// @brief Categories of loops encountered during semantic OpenMP loop + /// Categories of loops encountered during semantic OpenMP loop /// analysis /// /// This enumeration identifies the structural category of a loop or sequence diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index e0570262b2a05..4eb249948932e 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1028,23 +1028,20 @@ OMPLoopRangeClause * OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc, - ArrayRef Args) { - - assert(Args.size() == 2 && - "looprange clause must have exactly two arguments"); + Expr *First, Expr* Count) { OMPLoopRangeClause *Clause = CreateEmpty(C); Clause->setLocStart(StartLoc); Clause->setLParenLoc(LParenLoc); Clause->setFirstLoc(FirstLoc); Clause->setCountLoc(CountLoc); Clause->setLocEnd(EndLoc); - Clause->setArgs(Args); + Clause->setFirst(First); + Clause->setCount(Count); return Clause; } OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { - void *Mem = C.Allocate(totalSizeToAlloc(2)); - return new (Mem) OMPLoopRangeClause(); + return new (C) OMPLoopRangeClause(); } OMPAllocateClause *OMPAllocateClause::Create( diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 8aa21c5c01220..edffb041b3664 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14195,29 +14195,29 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( } /// Overloaded base case function -template static bool tryHandleAs(T *t, F &&) { +template static bool tryHandleAs(T *, F &&) { return false; } -/// Tries to recursively cast `t` to one of the given types and invokes `f` if -/// successful. +/// Tries to recursively cast `Type` to one of the given types and invokes +/// `Func` if successful. /// -/// @tparam Class The first type to check. -/// @tparam Rest The remaining types to check. -/// @tparam T The base type of `t`. -/// @tparam F The callable type for the function to invoke upon a successful +/// \tparam Class The first type to check. +/// \tparam Rest The remaining types to check. +/// \tparam T The base type of `Type`. +/// \tparam F The callable type for the function to invoke upon a successful /// cast. -/// @param t The object to be checked. -/// @param f The function to invoke if `t` matches `Class`. -/// @return `true` if `t` matched any type and `f` was called, otherwise +/// \param Type The object to be checked. +/// \param Func The function to invoke if `Type` matches `Class`. +/// \return `true` if `Type` matched any type and `Func` was called, otherwise /// `false`. template -static bool tryHandleAs(T *t, F &&f) { - if (Class *c = dyn_cast(t)) { - f(c); +static bool tryHandleAs(T *Type, F &&Func) { + if (Class *C = dyn_cast(Type)) { + Func(C); return true; } - return tryHandleAs(t, std::forward(f)); + return tryHandleAs(Type, std::forward(Func)); } /// Updates OriginalInits by checking Transform against loop transformation @@ -14297,7 +14297,7 @@ bool SemaOpenMP::checkTransformableLoopNest( /// } /// } /// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops -class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { +class NestedLoopCounterVisitor final : public DynamicRecursiveASTVisitor { private: unsigned NestedLoopCount = 0; @@ -14385,22 +14385,21 @@ bool SemaOpenMP::analyzeLoopSequence( LoopSeqSize += NumGeneratedLoopNests; NumLoops += NumGeneratedLoops; return true; - } else { - // Unroll full (0 loops produced) - Diag(Child->getBeginLoc(), diag::err_omp_not_for) - << 0 << getOpenMPDirectiveName(Kind); - return false; } + // Unroll full (0 loops produced) + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; } // Handle loop transformations with multiple loop nests // Unroll full - if (NumGeneratedLoopNests <= 0) { + if (!NumGeneratedLoopNests) { Diag(Child->getBeginLoc(), diag::err_omp_not_for) << 0 << getOpenMPDirectiveName(Kind); return false; } // Loop transformatons such as split or loopranged fuse - else if (NumGeneratedLoopNests > 1) { + if (NumGeneratedLoopNests > 1) { // Get the preinits related to this loop sequence generating // loop transformation (i.e loopranged fuse, split...) LoopSequencePreInits.emplace_back(); @@ -14413,30 +14412,29 @@ bool SemaOpenMP::analyzeLoopSequence( LoopHelpers, ForStmts, OriginalInits, TransformsPreInits, LoopSequencePreInits, LoopCategories, Context, Kind); - } else { - // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) - // Process the transformed loop statement - OriginalInits.emplace_back(); - TransformsPreInits.emplace_back(); - LoopHelpers.emplace_back(); - LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); - - unsigned IsCanonical = - checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, - *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); - - if (!IsCanonical) { - Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) - << getOpenMPDirectiveName(Kind); - return false; - } - StoreLoopStatements(TransformedStmt); - updatePreInits(LoopTransform, TransformsPreInits); + } + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + // Process the transformed loop statement + OriginalInits.emplace_back(); + TransformsPreInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); - NumLoops += NumGeneratedLoops; - ++LoopSeqSize; - return true; + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, + *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; } + StoreLoopStatements(TransformedStmt); + updatePreInits(LoopTransform, TransformsPreInits); + + NumLoops += NumGeneratedLoops; + ++LoopSeqSize; + return true; }; /// Modularized code for handling regular canonical loops @@ -16303,7 +16301,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Only TransformSingleLoop requires inserting pre-inits here if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { - auto TransformPreInit = TransformsPreInits[TransformIndex++]; + const auto &TransformPreInit = TransformsPreInits[TransformIndex++]; if (!TransformPreInit.empty()) { llvm::append_range(PreInits, TransformPreInit); } @@ -17456,15 +17454,13 @@ OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( if (CountVal.isInvalid()) Count = nullptr; - SmallVector ArgsVec = {First, Count}; - // OpenMP [6.0, Restrictions] // first + count - 1 must not evaluate to a value greater than the // loop sequence length of the associated canonical loop sequence. // This check must be performed afterwards due to the delayed // parsing and computation of the associated loop sequence return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, - FirstLoc, CountLoc, EndLoc, ArgsVec); + FirstLoc, CountLoc, EndLoc, First, Count); } OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, >From 010a57f0e86d2b989e746d286a93b18add2e50ef Mon Sep 17 00:00:00 2001 From: Roger Ferrer Ibanez Date: Mon, 14 Jul 2025 13:21:39 +0000 Subject: [PATCH 13/13] More changes addressing feedback --- .../clang/Basic/DiagnosticSemaKinds.td | 7 ++- clang/lib/Sema/SemaOpenMP.cpp | 6 +-- clang/test/OpenMP/fuse_messages.cpp | 51 +++++++++---------- .../fuse/parallel-wsloop-collapse-intfor.c | 2 +- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d807b6b076724..10b5349fa4d43 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11614,15 +11614,14 @@ def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; def err_omp_not_canonical_loop : Error< "loop after '#pragma omp %0' is not in canonical form">; -def err_omp_not_a_loop_sequence : Error< +def err_omp_not_a_loop_sequence : Error< "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; def err_omp_empty_loop_sequence : Error< "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; def err_omp_invalid_looprange : Error< - "loop range in '#pragma omp %0' exceeds the number of available loops: " - "range end '%1' is greater than the total number of loops '%2'">; + "looprange clause selects loops from %1 to %2 but this exceeds the number of loops (%3) in the loop sequence">; def warn_omp_redundant_fusion : Warning< - "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, + "looprange clause selects a single loop, resulting in redundant fusion">, InGroup; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index edffb041b3664..2f5a1b39294b6 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15913,8 +15913,8 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) { SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) - << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1) - << LoopSeqSize; + << getOpenMPDirectiveName(OMPD_fuse) << FirstVal + << (FirstVal + CountVal - 1) << LoopSeqSize; return StmtError(); } @@ -16021,7 +16021,7 @@ StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, // Firstly we need to update TransformIndex to match the begining of the // looprange section - for (unsigned int I = 0; I < FirstVal - 1; ++I) { + for (unsigned I : llvm::seq(FirstVal - 1)) { if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) ++TransformIndex; } diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp index 4902d424373e5..5df904fc2b15e 100644 --- a/clang/test/OpenMP/fuse_messages.cpp +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -3,23 +3,23 @@ void func() { // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} - #pragma omp fuse + #pragma omp fuse ; // expected-error at +2 {{statement after '#pragma omp fuse' must be a for loop}} - #pragma omp fuse + #pragma omp fuse {int bar = 0;} // expected-error at +4 {{statement after '#pragma omp fuse' must be a for loop}} - #pragma omp fuse + #pragma omp fuse { for(int i = 0; i < 10; ++i); int x = 2; } // expected-error at +2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} - #pragma omp fuse - #pragma omp for + #pragma omp fuse + #pragma omp for for (int i = 0; i < 7; ++i) ; @@ -39,7 +39,7 @@ void func() { // expected-error at +1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} - #pragma omp fuse final(0) + #pragma omp fuse final(0) { for (int i = 0; i < 7; ++i) ; @@ -49,7 +49,7 @@ void func() { //expected-error at +4 {{loop after '#pragma omp fuse' is not in canonical form}} //expected-error at +3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} - #pragma omp fuse + #pragma omp fuse { for(int i = 0; i < 10; i*=2) { ; @@ -58,25 +58,25 @@ void func() { } //expected-error at +2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} - #pragma omp fuse + #pragma omp fuse {} //expected-error at +3 {{statement after '#pragma omp fuse' must be a for loop}} - #pragma omp fuse + #pragma omp fuse { - #pragma omp unroll full + #pragma omp unroll full for(int i = 0; i < 10; ++i); - + for(int j = 0; j < 10; ++j); } - //expected-warning at +2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + //expected-warning at +2 {{looprange clause selects a single loop, resulting in redundant fusion}} #pragma omp fuse { for(int i = 0; i < 10; ++i); } - //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + //expected-warning at +1 {{looprange clause selects a single loop, resulting in redundant fusion}} #pragma omp fuse looprange(1, 1) { for(int i = 0; i < 10; ++i); @@ -99,7 +99,7 @@ void func() { const int x = 1; constexpr int y = 4; - //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + //expected-error at +1 {{looprange clause selects loops from 1 to 4 but this exceeds the number of loops (3) in the loop sequence}} #pragma omp fuse looprange(x,y) { for(int i = 0; i < 10; ++i); @@ -107,7 +107,7 @@ void func() { for(int k = 0; k < 50; ++k); } - //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}} + //expected-error at +1 {{looprange clause selects loops from 1 to 420 but this exceeds the number of loops (3) in the loop sequence}} #pragma omp fuse looprange(1,420) { for(int i = 0; i < 10; ++i); @@ -115,7 +115,7 @@ void func() { for(int k = 0; k < 50; ++k); } - //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}} + //expected-error at +1 {{looprange clause selects loops from 1 to 6 but this exceeds the number of loops (5) in the loop sequence}} #pragma omp fuse looprange(1,6) { for(int i = 0; i < 10; ++i); @@ -130,21 +130,21 @@ void func() { } } - //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + //expected-error at +1 {{looprange clause selects loops from 2 to 4 but this exceeds the number of loops (3) in the loop sequence}} #pragma omp fuse looprange(2,3) { #pragma omp unroll partial(2) for(int i = 0; i < 10; ++i); - + #pragma omp reverse for(int j = 0; j < 10; ++j); - #pragma omp fuse + #pragma omp fuse { { #pragma omp reverse for(int j = 0; j < 10; ++j); - } + } for(int k = 0; k < 50; ++k); } } @@ -154,7 +154,7 @@ void func() { template static void templated_func() { - //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + //expected-warning at +1 {{looprange clause selects a single loop, resulting in redundant fusion}} #pragma omp fuse looprange(2,1) { for(int i = 0; i < 10; ++i); @@ -162,7 +162,7 @@ static void templated_func() { for(int k = 0; k < 50; ++k); } - //expected-error at +1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}} + //expected-error at +1 {{looprange clause selects loops from 3 to 5 but this exceeds the number of loops (3) in the loop sequence}} #pragma omp fuse looprange(3,3) { for(int i = 0; i < 10; ++i); @@ -172,10 +172,10 @@ static void templated_func() { } -template +template static void templated_func_value_dependent() { - //expected-warning at +1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + //expected-warning at +1 {{looprange clause selects a single loop, resulting in redundant fusion}} #pragma omp fuse looprange(V,1) { for(int i = 0; i < 10; ++i); @@ -184,7 +184,7 @@ static void templated_func_value_dependent() { } } -template +template static void templated_func_type_dependent() { constexpr T s = 1; @@ -205,7 +205,6 @@ void template_inst() { templated_func_value_dependent<1>(); // expected-note at +1 {{in instantiation of function template specialization 'templated_func_type_dependent' requested here}} templated_func_type_dependent(); - } diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c index 272908e72c429..9630fec50bc20 100644 --- a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c @@ -24,7 +24,7 @@ int main() { #endif /* HEADER */ // CHECK: do -// CHECK: i=0 j=0 +// CHECK-NEXT: i=0 j=0 // CHECK-NEXT: i=0 k=0 // CHECK-NEXT: i=0 j=1 // CHECK-NEXT: i=0 k=1 From openmp-commits at lists.llvm.org Mon Jul 14 07:24:57 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Mon, 14 Jul 2025 07:24:57 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751339.170a0220.765aa.54ac@mx.google.com> https://github.com/jplehr edited https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:24:57 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Mon, 14 Jul 2025 07:24:57 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751339.630a0220.38c6a9.6a8d@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) ---------------- jplehr wrote: Is this the right CMake var to key off from? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:24:58 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Mon, 14 Jul 2025 07:24:58 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <6875133a.050a0220.151fda.6957@mx.google.com> ================ @@ -0,0 +1,29 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package. +## +################################################################################ + +# Compute installation prefix relative to this file. +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) ---------------- jplehr wrote: Do we need this 6 times, or am I just not seeing the difference? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:24:58 2025 From: openmp-commits at lists.llvm.org (Jan Patrick Lehr via Openmp-commits) Date: Mon, 14 Jul 2025 07:24:58 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <6875133a.a70a0220.2a8fc5.7af3@mx.google.com> https://github.com/jplehr commented: Just a few comments. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751750.050a0220.34f215.8489@mx.google.com> https://github.com/jprotze commented: Some comments for the first files https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751750.170a0220.cee89.8667@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) ---------------- jprotze wrote: Should probably default to libomp-standalone value. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:23 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:23 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <6875174f.170a0220.3446dd.5ee7@mx.google.com> https://github.com/jprotze edited https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751750.050a0220.263763.7eac@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) ---------------- jprotze wrote: Does this make sense here? Or should it be `LIBOMP_OMPT_SUPPORT`? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751750.170a0220.1acab6.7677@mx.google.com> ================ @@ -0,0 +1,85 @@ +//===- OmptAliases.h - Shorthand aliases for OMPT enum values ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines shorthand aliases for OMPT enum values, providing improved +/// ease-of-use and readability. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H + +#include + +/// Aliases for enum: ompt_scope_endpoint_t +constexpr ompt_scope_endpoint_t BEGIN = ompt_scope_begin; +constexpr ompt_scope_endpoint_t END = ompt_scope_end; +constexpr ompt_scope_endpoint_t BEGINEND = ompt_scope_beginend; + +/// Aliases for enum: ompt_target_t +constexpr ompt_target_t TARGET = ompt_target; +constexpr ompt_target_t ENTER_DATA = ompt_target_enter_data; +constexpr ompt_target_t EXIT_DATA = ompt_target_exit_data; +constexpr ompt_target_t UPDATE = ompt_target_update; +constexpr ompt_target_t TARGET_NOWAIT = ompt_target_nowait; +constexpr ompt_target_t ENTER_DATA_NOWAIT = ompt_target_enter_data_nowait; +constexpr ompt_target_t EXIT_DATA_NOWAIT = ompt_target_exit_data_nowait; +constexpr ompt_target_t UPDATE_NOWAIT = ompt_target_update_nowait; + +/// Aliases for enum: ompt_target_data_op_t +constexpr ompt_target_data_op_t ALLOC = ompt_target_data_alloc; +constexpr ompt_target_data_op_t H2D = ompt_target_data_transfer_to_device; ---------------- jprotze wrote: I think, h2d and d2h were deprecated and deleted? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751750.170a0220.264ab2.675a@mx.google.com> ================ @@ -0,0 +1,29 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package. +## +################################################################################ + +# Compute installation prefix relative to this file. +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) ---------------- jprotze wrote: Duplicates? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:26 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:26 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751752.170a0220.379e1e.6187@mx.google.com> ================ @@ -0,0 +1,331 @@ +//===- InternalEvent.h - Internal event representation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Declares internal event representations along the default CTOR definition. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H + +#include "InternalEventCommon.h" + +#include +#include +#include + +#define expectedDefault(TypeName) std::numeric_limits::min() + +namespace omptest { + +namespace util { + +/// String manipulation helper function. Takes up to 8 bytes of data and returns +/// their hexadecimal representation as string. The data can be expanded to the +/// given size in bytes and will by default be prefixed with '0x'. +std::string makeHexString(uint64_t Data, bool IsPointer = true, + size_t DataBytes = 0, bool ShowHexBase = true); + +} // namespace util + +namespace internal { +// clang-format off +event_class_w_custom_body(AssertionSyncPoint, \ ---------------- jprotze wrote: I would prefer an expansion of these macros. Will be no fun debugging, if a random statement in the macro segfaults. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:42:26 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:42:26 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68751752.050a0220.1b247f.909e@mx.google.com> ================ @@ -0,0 +1,377 @@ +//===- OmptAssertEvent.h - Assertion event declarations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains assertion event constructors, for generally all observable events. +/// This includes user-generated events, like synchronization. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H + +#include "InternalEvent.h" +#include "omp-tools.h" + +#include +#include +#include +#include + +namespace omptest { + +enum class ObserveState { generated, always, never }; + +/// Helper function, returning an ObserveState string representation +const char *to_string(ObserveState State); + +/// Assertion event struct, provides statically callable CTORs. +struct OmptAssertEvent { + static OmptAssertEvent AssertionSyncPoint(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + const std::string &SyncPointName); + + static OmptAssertEvent AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType); + + static OmptAssertEvent ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads); + + static OmptAssertEvent ParallelEnd( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + uint64_t Count = expectedDefault(uint64_t), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_dispatch_t Kind = expectedDefault(ompt_dispatch_t), + ompt_data_t Instance = expectedDefault(ompt_data_t)); + + static OmptAssertEvent + TaskCreate(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + const ompt_frame_t *EncounteringTaskFrame = + expectedDefault(ompt_frame_t *), + ompt_data_t *NewTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + int HasDependences = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent + ImplicitTask(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + unsigned int ActualParallelism = expectedDefault(unsigned int), + unsigned int Index = expectedDefault(unsigned int), + int Flags = expectedDefault(int)); + + static OmptAssertEvent + SyncRegion(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, ---------------- jprotze wrote: Why are there two of them with different argument orderings? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 07:44:02 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 14 Jul 2025 07:44:02 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687517b2.170a0220.1b0558.6025@mx.google.com> https://github.com/jprotze edited https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Mon Jul 14 14:51:01 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Mon, 14 Jul 2025 14:51:01 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <68757bc5.170a0220.301918.9ca6@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 1/9] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 2/9] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 3/9] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 4/9] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 5/9] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } >From fd69d49e8509161925d03015e5706c36a47b64b2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:17:47 +0200 Subject: [PATCH 6/9] remove unnecessary conditions --- offload/libomptarget/OpenMP/InteropAPI.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 57be23f10d24d..fa6325333c606 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -286,9 +286,6 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { @@ -322,9 +319,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) DP("Warning: nowait flag on interop destroy not supported " @@ -348,11 +342,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - if (!Interop) { - DP("Call to %s with invalid interop\n", __func__); - return omp_irc_empty; - } - Interop->addCompletionCb(cb, data); return omp_irc_success; >From fbca38468cd004afb311d5066abcc3c5ca96392e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:52:30 +0200 Subject: [PATCH 7/9] another corner case when unloading --- offload/libomptarget/PluginManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 2cc1314e7a4f0..f5d913f2b8909 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -132,7 +132,8 @@ void PluginManager::initializeAllDevices() { std::atexit([]() { // Interop cleanup should be done before the plugins are deinitialized as // the backend libraries may be already unloaded. - PM->InteropTbl.clear(); + if (PM) + PM->InteropTbl.clear(); }); } >From f5715cdccdbcf60f5ac81d93bff2c08059ef5dd2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 19:50:01 +0200 Subject: [PATCH 8/9] make version 32bits to simplify codegen --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 61cbedf06a9a6..3662d221e4bd0 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -51,7 +51,7 @@ struct interop_flags_t { }; struct interop_ctx_t { - uint16_t version; // version of the interface (current is 0) + uint32_t version; // version of the interface (current is 0) interop_flags_t flags; int gtid; }; >From 82fa72d175aa98a8983cc0365756aaa61a51a9c3 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 23:50:42 +0200 Subject: [PATCH 9/9] Fix sporadic race condition with helper threads on deinit --- offload/include/PluginManager.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 ++++++ offload/libomptarget/OpenMP/API.cpp | 9 ++++++++- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ea1f3b6406ce7..6c6fdebe76dff 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -191,4 +191,5 @@ void deinitRuntime(); extern PluginManager *PM; extern std::atomic RTLAlive; // Indicates if the RTL has been initialized +extern std::atomic RTLOngoingSyncs; // Counts ongoing external syncs #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 134ab7c95ac0b..04bd21ec91a49 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -23,6 +23,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; std::atomic RTLAlive{false}; +std::atomic RTLOngoingSyncs{0}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -56,6 +57,11 @@ void deinitRuntime() { DP("Deinit offload library!\n"); // RTL deinitialization has started RTLAlive = false; + while (RTLOngoingSyncs > 0) { + DP("Waiting for ongoing syncs to finish, count: %d\n", + RTLOngoingSyncs.load()); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index f61f56772504b..bffb92722a057 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -689,9 +689,16 @@ void syncImplicitInterops(int gtid, void *event); // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, void *event) { - if (!RTLAlive) return; + RTLOngoingSyncs++; + if (!RTLAlive) { + RTLOngoingSyncs--; + return; + } + syncImplicitInterops(gtid, event); + + RTLOngoingSyncs--; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index fa6325333c606..b174ec487a93a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -289,8 +289,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported - "yet. Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); nowait = false; } From openmp-commits at lists.llvm.org Tue Jul 15 04:40:04 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 04:40:04 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68763e14.170a0220.1cd1dd.b639@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) ---------------- mhalk wrote: Good point, also noted by @jprotze. No, not necessarily. I *think* with the extended scope of host & device, we might switch to `LIBOMP_OMPT_SUPPORT` as indirectly suggested by Joachim. WDYT? Do we need to actively "hide" some device-related functionalities when `LIBOMP_OMPT_SUPPORT=ON` and `LIBOMPTARGET_OMPT_SUPPORT=OFF`? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 04:51:03 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 04:51:03 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687640a7.170a0220.11d4b5.c2c7@mx.google.com> ================ @@ -0,0 +1,29 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package. +## +################################################################################ + +# Compute installation prefix relative to this file. +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) ---------------- mhalk wrote: Also pointed out by @jprotze: Yes, we need this same statement six times; it acts as "one directory level up". So this acts as `../../../../../../`, which in that case: returns the build-directory root. Since I was also slightly confused when I saw this in `LLVMConfig.cmake`, I will see if there's a better way. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:04:00 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 05:04:00 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687643b0.630a0220.101100.ea2b@mx.google.com> ================ @@ -0,0 +1,377 @@ +//===- OmptAssertEvent.h - Assertion event declarations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains assertion event constructors, for generally all observable events. +/// This includes user-generated events, like synchronization. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H + +#include "InternalEvent.h" +#include "omp-tools.h" + +#include +#include +#include +#include + +namespace omptest { + +enum class ObserveState { generated, always, never }; + +/// Helper function, returning an ObserveState string representation +const char *to_string(ObserveState State); + +/// Assertion event struct, provides statically callable CTORs. +struct OmptAssertEvent { + static OmptAssertEvent AssertionSyncPoint(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + const std::string &SyncPointName); + + static OmptAssertEvent AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType); + + static OmptAssertEvent ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads); + + static OmptAssertEvent ParallelEnd( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + uint64_t Count = expectedDefault(uint64_t), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_dispatch_t Kind = expectedDefault(ompt_dispatch_t), + ompt_data_t Instance = expectedDefault(ompt_data_t)); + + static OmptAssertEvent + TaskCreate(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + const ompt_frame_t *EncounteringTaskFrame = + expectedDefault(ompt_frame_t *), + ompt_data_t *NewTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + int HasDependences = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent + ImplicitTask(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + unsigned int ActualParallelism = expectedDefault(unsigned int), + unsigned int Index = expectedDefault(unsigned int), + int Flags = expectedDefault(int)); + + static OmptAssertEvent + SyncRegion(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, ---------------- mhalk wrote: The first one (without default values) resembles the signature of the OpenMP spec. Additionally, we wanted to provide an "easy-to-use" CTOR, where you may just provide the `OpType` (as Name, Group, and Expected may be handled internally). Other arguments were reordered as reasonable as possible. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:11:26 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 05:11:26 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <6876456e.050a0220.3b8c77.e4a8@mx.google.com> ================ @@ -0,0 +1,331 @@ +//===- InternalEvent.h - Internal event representation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Declares internal event representations along the default CTOR definition. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H + +#include "InternalEventCommon.h" + +#include +#include +#include + +#define expectedDefault(TypeName) std::numeric_limits::min() + +namespace omptest { + +namespace util { + +/// String manipulation helper function. Takes up to 8 bytes of data and returns +/// their hexadecimal representation as string. The data can be expanded to the +/// given size in bytes and will by default be prefixed with '0x'. +std::string makeHexString(uint64_t Data, bool IsPointer = true, + size_t DataBytes = 0, bool ShowHexBase = true); + +} // namespace util + +namespace internal { +// clang-format off +event_class_w_custom_body(AssertionSyncPoint, \ ---------------- mhalk wrote: Understandable, I have no objection to expand these. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:13:36 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 15 Jul 2025 05:13:36 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687645f0.170a0220.9ab8a.e8a0@mx.google.com> https://github.com/jprotze edited https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:14:13 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 15 Jul 2025 05:14:13 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68764615.170a0220.6fcd7.bc37@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) ---------------- jprotze wrote: Sorry, should be LIBOMP_OMPT_SUPPORT :) https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:14:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 15 Jul 2025 05:14:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68764620.630a0220.35f50b.ed8c@mx.google.com> https://github.com/jprotze edited https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:17:39 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 15 Jul 2025 05:17:39 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687646e3.170a0220.d3325.c570@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) ---------------- jprotze wrote: I guess the question is: can we write assertions that automatically adjust to the set of available callbacks? I.e., if the target callback is not supported, but occurs in a sequence, just ignore such callback. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 05:48:06 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 05:48:06 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68764e06.170a0220.313a89.bf7a@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) ---------------- mhalk wrote: Thinking about this I'd say: ompTest would try to register the callbacks, which fails, emits a print and continues: * PR: [openmp/tools/omptest/src/OmptTester.cpp](https://github.com/llvm/llvm-project/pull/147381/files/f024da45acd71e4dfe65d3d9cc6b86cf9cb7cc69#diff-5444ac7a0af0a21f1de08291d12c0050b446ae7648d6fea7a46128e324124353R32-R38) * Originally: [openmp/runtime/test/ompt/callback.h](https://github.com/llvm/llvm-project/blob/main/openmp/runtime/test/ompt/callback.h#L352-L357) But now that I re-read, you want the written assertions to be ignored in absence of the corresponding callback, correct? https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 08:19:31 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 15 Jul 2025 08:19:31 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68767183.170a0220.160eec.7ccd@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) ---------------- jprotze wrote: No, I mean, instead of OFF by default, use the value of `OPENMP_STANDALONE_BUILD` as the default for the option ```suggestion "Build ompTest 'standalone', i.e. w/o GoogleTest." ${OPENMP_STANDALONE_BUILD}) ``` If we build with llvm-project/openmp as the source directory, the option will automatically use the sane default. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 08:20:57 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 15 Jul 2025 08:20:57 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687671d9.050a0220.41f97.5af4@mx.google.com> ================ @@ -0,0 +1,29 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package. +## +################################################################################ + +# Compute installation prefix relative to this file. +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) ---------------- jprotze wrote: Just add a comment there explaining what it does :D https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 09:08:46 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 09:08:46 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68767d0e.170a0220.23ff9d.4c43@mx.google.com> ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) ---------------- mhalk wrote: Sounds good to me, I like it. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 15 09:09:06 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 15 Jul 2025 09:09:06 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <68767d22.630a0220.2a28de.5f5e@mx.google.com> https://github.com/mhalk updated https://github.com/llvm/llvm-project/pull/147381 >From f024da45acd71e4dfe65d3d9cc6b86cf9cb7cc69 Mon Sep 17 00:00:00 2001 From: Michael Halkenhaeuser Date: Wed, 2 Jul 2025 05:32:04 -0500 Subject: [PATCH 1/2] [OpenMP] Add ompTest library to OpenMP Description =========== OpenMP Tooling Interface Testing Library (ompTest) ompTest is a unit testing framework for testing OpenMP implementations. It offers a simple-to-use framework that allows a tester to check for OMPT events in addition to regular unit testing code, supported by linking against GoogleTest by default. It also facilitates writing concise tests while bridging the semantic gap between the unit under test and the OMPT-event testing. Background ========== This library has been developed to provide the means of testing OMPT implementations with reasonable effort. Especially, asynchronous or unordered events are supported and can be verified with ease, which may prove to be challenging with LIT-based tests. Additionally, since the assertions are part of the code being tested, ompTest can reference all corresponding variables during assertion. Basic Usage =========== OMPT event assertions are placed before the code, which shall be tested. These assertion can either be provided as one block or interleaved with the test code. There are two types of asserters: (1) sequenced "order-sensitive" and (2) set "unordered" assserters. Once the test is being run, the corresponding events are triggered by the OpenMP runtime and can be observed. Each of these observed events notifies asserters, which then determine if the test should pass or fail. Example (partial, interleaved) ============================== int N = 100000; int a[N]; int b[N]; OMPT_ASSERT_SEQUENCE(Target, TARGET, BEGIN, 0); OMPT_ASSERT_SEQUENCE(TargetDataOp, ALLOC, N * sizeof(int)); // a ? OMPT_ASSERT_SEQUENCE(TargetDataOp, H2D, N * sizeof(int), &a); OMPT_ASSERT_SEQUENCE(TargetDataOp, ALLOC, N * sizeof(int)); // b ? OMPT_ASSERT_SEQUENCE(TargetDataOp, H2D, N * sizeof(int), &b); OMPT_ASSERT_SEQUENCE(TargetSubmit, 1); OMPT_ASSERT_SEQUENCE(TargetDataOp, D2H, N * sizeof(int), nullptr, &b); OMPT_ASSERT_SEQUENCE(TargetDataOp, D2H, N * sizeof(int), nullptr, &a); OMPT_ASSERT_SEQUENCE(TargetDataOp, DELETE); OMPT_ASSERT_SEQUENCE(TargetDataOp, DELETE); OMPT_ASSERT_SEQUENCE(Target, TARGET, END, 0); { for (int j = 0; j < N; j++) a[j] = b[j]; } References ========== This work has been presented at SC'24 workshops, see: https://ieeexplore.ieee.org/document/10820689 Current State and Future Work ============================= ompTest's development was mostly device-centric and aimed at OMPT device callbacks and device-side tracing. Consequentially, a substantial part of host-related events or features may not be supported in its current state. However, we are confident that the related functionality can be added and ompTest provides a general foundation for future OpenMP and especially OMPT testing. This PR will allow us to upstream the corresponding features, like OMPT device-side tracing in the future with significantly reduced risk of introducing regressions in the process. Build ===== ompTest is linked against LLVM's GoogleTest by default, but can also be built 'standalone'. Additionally, it comes with a set of unit tests, which in turn require GoogleTest (overriding a standalone build). The unit tests are added to the `check-openmp` target. Use the following parameters to perform the corresponding build: `LIBOMPTEST_BUILD_STANDALONE` (Default: OFF) `LIBOMPTEST_BUILD_UNITTESTS` (Default: OFF) --------- Co-authored-by: Jan-Patrick Lehr --- openmp/README.rst | 1 + openmp/tools/omptest/CMakeLists.txt | 116 ++++ openmp/tools/omptest/README.md | 279 +++++++++ .../omptest/cmake/omptest-config.cmake.in | 29 + openmp/tools/omptest/include/AssertMacros.h | 138 ++++ openmp/tools/omptest/include/InternalEvent.h | 331 ++++++++++ .../omptest/include/InternalEventCommon.h | 133 ++++ openmp/tools/omptest/include/Logging.h | 155 +++++ openmp/tools/omptest/include/OmptAliases.h | 85 +++ .../tools/omptest/include/OmptAssertEvent.h | 377 +++++++++++ openmp/tools/omptest/include/OmptAsserter.h | 291 +++++++++ .../omptest/include/OmptCallbackHandler.h | 165 +++++ openmp/tools/omptest/include/OmptTester.h | 60 ++ .../tools/omptest/include/OmptTesterGlobals.h | 36 ++ .../omptest/include/OmptTesterGoogleTest.h | 86 +++ .../omptest/include/OmptTesterStandalone.h | 123 ++++ openmp/tools/omptest/src/InternalEvent.cpp | 367 +++++++++++ .../omptest/src/InternalEventOperators.cpp | 366 +++++++++++ openmp/tools/omptest/src/Logging.cpp | 177 ++++++ openmp/tools/omptest/src/OmptAssertEvent.cpp | 587 ++++++++++++++++++ openmp/tools/omptest/src/OmptAsserter.cpp | 480 ++++++++++++++ .../tools/omptest/src/OmptCallbackHandler.cpp | 445 +++++++++++++ openmp/tools/omptest/src/OmptTester.cpp | 504 +++++++++++++++ .../omptest/src/OmptTesterStandalone.cpp | 147 +++++ openmp/tools/omptest/test/CMakeLists.txt | 28 + openmp/tools/omptest/test/lit.cfg | 26 + openmp/tools/omptest/test/lit.site.cfg.in | 9 + .../test/unittests/asserter-seq-test.cpp | 358 +++++++++++ .../test/unittests/internal-event-test.cpp | 530 ++++++++++++++++ .../test/unittests/internal-util-test.cpp | 95 +++ .../omptest/test/unittests/main-test.cpp | 141 +++++ 31 files changed, 6665 insertions(+) create mode 100644 openmp/tools/omptest/CMakeLists.txt create mode 100644 openmp/tools/omptest/README.md create mode 100644 openmp/tools/omptest/cmake/omptest-config.cmake.in create mode 100644 openmp/tools/omptest/include/AssertMacros.h create mode 100644 openmp/tools/omptest/include/InternalEvent.h create mode 100644 openmp/tools/omptest/include/InternalEventCommon.h create mode 100644 openmp/tools/omptest/include/Logging.h create mode 100644 openmp/tools/omptest/include/OmptAliases.h create mode 100644 openmp/tools/omptest/include/OmptAssertEvent.h create mode 100644 openmp/tools/omptest/include/OmptAsserter.h create mode 100644 openmp/tools/omptest/include/OmptCallbackHandler.h create mode 100644 openmp/tools/omptest/include/OmptTester.h create mode 100644 openmp/tools/omptest/include/OmptTesterGlobals.h create mode 100644 openmp/tools/omptest/include/OmptTesterGoogleTest.h create mode 100644 openmp/tools/omptest/include/OmptTesterStandalone.h create mode 100644 openmp/tools/omptest/src/InternalEvent.cpp create mode 100644 openmp/tools/omptest/src/InternalEventOperators.cpp create mode 100644 openmp/tools/omptest/src/Logging.cpp create mode 100644 openmp/tools/omptest/src/OmptAssertEvent.cpp create mode 100644 openmp/tools/omptest/src/OmptAsserter.cpp create mode 100644 openmp/tools/omptest/src/OmptCallbackHandler.cpp create mode 100644 openmp/tools/omptest/src/OmptTester.cpp create mode 100644 openmp/tools/omptest/src/OmptTesterStandalone.cpp create mode 100644 openmp/tools/omptest/test/CMakeLists.txt create mode 100644 openmp/tools/omptest/test/lit.cfg create mode 100644 openmp/tools/omptest/test/lit.site.cfg.in create mode 100644 openmp/tools/omptest/test/unittests/asserter-seq-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/internal-event-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/internal-util-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/main-test.cpp diff --git a/openmp/README.rst b/openmp/README.rst index 2dfc8630858b8..c34d3e8a40d7d 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -369,6 +369,7 @@ There are following check-* make targets for tests. - ``check-ompt`` (ompt tests under runtime/test/ompt) - ``check-ompt-multiplex`` (ompt multiplex tests under tools/multiplex/tests) +- ``check-ompt-omptest`` (ompt omptest tests under tools/omptest/tests) - ``check-libarcher`` (libarcher tests under tools/archer/tests) - ``check-libomp`` (libomp tests under runtime/test. This includes check-ompt tests too) - ``check-libomptarget-*`` (libomptarget tests for specific target under libomptarget/test) diff --git a/openmp/tools/omptest/CMakeLists.txt b/openmp/tools/omptest/CMakeLists.txt new file mode 100644 index 0000000000000..19f9f898f4300 --- /dev/null +++ b/openmp/tools/omptest/CMakeLists.txt @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) + return() +endif() + +set(OMPTEST_HEADERS + ./include/AssertMacros.h + ./include/InternalEvent.h + ./include/InternalEventCommon.h + ./include/Logging.h + ./include/OmptAliases.h + ./include/OmptAsserter.h + ./include/OmptAssertEvent.h + ./include/OmptCallbackHandler.h + ./include/OmptTester.h + ./include/OmptTesterGlobals.h +) + +add_library(omptest + SHARED + + ${OMPTEST_HEADERS} + ./src/InternalEvent.cpp + ./src/InternalEventOperators.cpp + ./src/Logging.cpp + ./src/OmptAsserter.cpp + ./src/OmptAssertEvent.cpp + ./src/OmptCallbackHandler.cpp + ./src/OmptTester.cpp +) + +# Target: ompTest library +# On (implicit) request of GoogleTest, link against the one provided with LLVM. +if ((NOT LIBOMPTEST_BUILD_STANDALONE) OR LIBOMPTEST_BUILD_UNITTESTS) + # Check if standalone build was requested together with unittests + if (LIBOMPTEST_BUILD_STANDALONE) + # Emit warning: this build actually depends on LLVM's GoogleTest + message(WARNING "LIBOMPTEST_BUILD_STANDALONE and LIBOMPTEST_BUILD_UNITTESTS" + " requested simultaneously.\n" + "Linking against LLVM's GoogleTest library archives.\n" + "Disable LIBOMPTEST_BUILD_UNITTESTS to perform an actual" + " standalone build.") + # Explicitly disable LIBOMPTEST_BUILD_STANDALONE + set(LIBOMPTEST_BUILD_STANDALONE OFF) + endif() + + # Use LLVM's gtest library archive + set(GTEST_LIB "${LLVM_BINARY_DIR}/lib/libllvm_gtest.a") + # Link gtest as whole-archive to expose required symbols + set(GTEST_LINK_CMD "-Wl,--whole-archive" ${GTEST_LIB} + "-Wl,--no-whole-archive" LLVMSupport) + + # Add GoogleTest-based header + target_sources(omptest PRIVATE ./include/OmptTesterGoogleTest.h) + + # Add LLVM-provided GoogleTest include directories. + target_include_directories(omptest PRIVATE + ${LLVM_THIRD_PARTY_DIR}/unittest/googletest/include) + + # TODO: Re-visit ABI breaking checks, disable for now. + target_compile_definitions(omptest PUBLIC + -DLLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING) + + # Link against gtest and gtest_main + target_link_libraries(omptest PRIVATE ${GTEST_LINK_CMD}) +else() + # Add 'standalone' compile definitions + target_compile_definitions(omptest PRIVATE + -DOPENMP_LIBOMPTEST_BUILD_STANDALONE) + + # Add 'standalone' source files + target_sources(omptest PRIVATE + ./include/OmptTesterStandalone.h + ./src/OmptTesterStandalone.cpp) +endif() + +# Add common include directories. +target_include_directories(omptest PRIVATE + ./include + ${LIBOMPTARGET_INCLUDE_DIR}) +target_compile_features(omptest PRIVATE cxx_std_17) + +# Create and install package configuration files. +configure_file( + ${omptest_SOURCE_DIR}/cmake/omptest-config.cmake.in + ${omptest_BINARY_DIR}/cmake/omptest-config.cmake @ONLY) + +install(FILES ${omptest_BINARY_DIR}/cmake/omptest-config.cmake + DESTINATION "${OPENMP_INSTALL_LIBDIR}/cmake/openmp/omptest") + +# Install libomptest header files: Copy header-files from include dir +install(DIRECTORY ./include + DESTINATION "${LIBOMP_HEADERS_INSTALL_PATH}/omptest" + FILES_MATCHING PATTERN "*.h") + +install(TARGETS omptest LIBRARY COMPONENT omptest + DESTINATION "${OPENMP_INSTALL_LIBDIR}") + +# Discover unit tests (added to check-openmp) +if(LIBOMPTEST_BUILD_UNITTESTS) + add_subdirectory(test) +endif() diff --git a/openmp/tools/omptest/README.md b/openmp/tools/omptest/README.md new file mode 100644 index 0000000000000..bfed871b59bdb --- /dev/null +++ b/openmp/tools/omptest/README.md @@ -0,0 +1,279 @@ + +README for the OpenMP Tooling Interface Testing Library (ompTest) +================================================================= + +# Introduction +OpenMP Tooling Interface Testing Library (ompTest) +ompTest is a unit testing framework for testing OpenMP implementations. +It offers a simple-to-use framework that allows a tester to check for OMPT +events in addition to regular unit testing code, supported by linking against +GoogleTest by default. It also facilitates writing concise tests while bridging +the semantic gap between the unit under test and the OMPT-event testing. + +# Testing macros + +Corresponding macro definitions are located in: `./include/AssertMacros.h` + +## OMPT_GENERATE_EVENTS(NumberOfCopies, EventMacro) +`TODO` + +## OMPT_ASSERT_SET_EVENT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_GROUPED(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NAMED(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_EVENT_NOT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NOT(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_GROUPED_NOT(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NAMED_NOT(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_EVENT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE(EventTy, ...) +This macro checks for the occurrence of the provided event, which also +entails the exact sequence of events. When only using this assertion macro one +has to provide every single event in the exact order of occurrence. + +## OMPT_ASSERT_SEQUENCE_GROUPED(Group, EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE` with the addition of grouping. + +## OMPT_ASSERT_SEQUENCE_NAMED(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_NOT(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_GROUPED_NOT(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_NAMED_NOT(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_SUSPEND() +`TODO` + +## OMPT_ASSERT_SEQUENCE_ONLY(EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE`, while actually being preceded +-AND- succeeded by commands to suspend sequenced assertion until the next match. +As a result, one may omit all other "unneccessary" events from the sequence. + +## OMPT_ASSERT_SEQUENCE_GROUPED_ONLY(Group, EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE_ONLY`, plus grouping. + +## OMPT_ASSERT_SEQUENCE_NAMED_ONLY(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERTER_MODE_STRICT(Asserter) +`TODO` + +## OMPT_ASSERTER_MODE_RELAXED(Asserter) +`TODO` + +## OMPT_ASSERT_SEQUENCE_MODE_STRICT() +`TODO` + +## OMPT_ASSERT_SEQUENCE_MODE_RELAXED() +`TODO` + +## OMPT_ASSERT_SET_MODE_STRICT() +`TODO` + +## OMPT_ASSERT_SET_MODE_RELAXED() +`TODO` + +## OMPT_ASSERTER_DISABLE(Asserter) +`TODO` + +## OMPT_ASSERTER_ENABLE(Asserter) +`TODO` + +## OMPT_ASSERT_SET_DISABLE() +`TODO` + +## OMPT_ASSERT_SET_ENABLE() +`TODO` + +## OMPT_ASSERT_SEQUENCE_DISABLE() +`TODO` + +## OMPT_ASSERT_SEQUENCE_ENABLE() +`TODO` + +## OMPT_REPORT_EVENT_DISABLE() +`TODO` + +## OMPT_REPORT_EVENT_ENABLE() +`TODO` + +## OMPT_ASSERTER_PERMIT_EVENT(Asserter, EventTy) +`TODO` + +## OMPT_ASSERTER_SUPPRESS_EVENT(Asserter, EventTy) +`TODO` + +## OMPT_PERMIT_EVENT(EventTy) +`TODO` + +## OMPT_SUPPRESS_EVENT(EventTy) +`TODO` + +## OMPT_ASSERTER_LOG_LEVEL(Asserter, LogLevel) +`TODO` + +## OMPT_ASSERTER_LOG_FORMATTED(Asserter, FormatLog) +`TODO` + +## OMPT_ASSERT_SYNC_POINT(SyncPointName) +`TODO` + +### Grouping Asserts + +This allows to generate and verify data during runtime of a test. +Currently, we only use target region information which manifests into groups. +This allows to correlate multiple events to a certain target region without +manual interaction just by specifying a groupname for these events. + +When a target region is encountered and we are about to enter it, we gather the +`target_id` (non-EMI) -OR- `target_data->value` (EMI). This value is stored +along the groupname for future reference. Upon target region end, the +corresponding group is erased. (Note: The groupname is available again.) + +Other asserted callbacks which may occur within target regions query their +groupname: retrieving and comparing the value of the group against the observed +event's value. + +### Suspending Sequenced Asserts + +When a sequence of events is not of interest while testing, these additional +events may be ignored by suspending the assertion until the next match. This +can be done by using `OMPT_ASSERT_SEQUENCE_SUSPEND` manually or the `_ONLY` +macro variants, like `OMPT_ASSERT_GROUPED_SEQUENCE_ONLY`. + +The former adds a special event to the queue of expected events and signal +that any non-matching event should be ignored rather than failing the test. +`_ONLY` macros embed their corresponding macro between two calls to +`OMPT_ASSERT_SEQUENCE_SUSPEND`. As a consequence, we enter passive assertion +until a match occurs, then enter passive assertion again. This enables us to +"only" assert a certain, single event in arbitrary circumstances. + +### Asserter Modes +`TODO` + +## Aliases (shorthands) +To allow for easier writing of tests and enhanced readability, the following set +of aliases is introduced. The left hand side represents the original value, +while the right hand side depicts the shorthand version. + +| Type | Enum Value | Shorthand | +|---------------------------|---------------------------------------------|---------------------------| +| **ompt_scope_endpoint_t** | | | +| | ompt_scope_begin | BEGIN | +| | ompt_scope_end | END | +| | ompt_scope_beginend | BEGINEND | +| **ompt_target_t** | | | +| | ompt_target | TARGET | +| | ompt_target_enter_data | ENTER_DATA | +| | ompt_target_exit_data | EXIT_DATA | +| | ompt_target_update | UPDATE | +| | ompt_target_nowait | TARGET_NOWAIT | +| | ompt_target_enter_data_nowait | ENTER_DATA_NOWAIT | +| | ompt_target_exit_data_nowait | EXIT_DATA_NOWAIT | +| | ompt_target_update_nowait | UPDATE_NOWAIT | +| **ompt_target_data_op_t** | | | +| | ompt_target_data_alloc | ALLOC | +| | ompt_target_data_transfer_to_device | H2D | +| | ompt_target_data_transfer_from_device | D2H | +| | ompt_target_data_delete | DELETE | +| | ompt_target_data_associate | ASSOCIATE | +| | ompt_target_data_disassociate | DISASSOCIATE | +| | ompt_target_data_alloc_async | ALLOC_ASYNC | +| | ompt_target_data_transfer_to_device_async | H2D_ASYNC | +| | ompt_target_data_transfer_from_device_async | D2H_ASYNC | +| | ompt_target_data_delete_async | DELETE_ASYNC | +| **ompt_callbacks_t** | | | +| | ompt_callback_target | CB_TARGET | +| | ompt_callback_target_data_op | CB_DATAOP | +| | ompt_callback_target_submit | CB_KERNEL | +| **ompt_work_t** | | | +| | ompt_work_loop | WORK_LOOP | +| | ompt_work_sections | WORK_SECT | +| | ompt_work_single_executor | WORK_EXEC | +| | ompt_work_single_other | WORK_SINGLE | +| | ompt_work_workshare | WORK_SHARE | +| | ompt_work_distribute | WORK_DIST | +| | ompt_work_taskloop | WORK_TASK | +| | ompt_work_scope | WORK_SCOPE | +| | ompt_work_loop_static | WORK_LOOP_STA | +| | ompt_work_loop_dynamic | WORK_LOOP_DYN | +| | ompt_work_loop_guided | WORK_LOOP_GUI | +| | ompt_work_loop_other | WORK_LOOP_OTH | +| **ompt_sync_region_t** | | | +| | ompt_sync_region_barrier | SR_BARRIER | +| | ompt_sync_region_barrier_implicit | SR_BARRIER_IMPL | +| | ompt_sync_region_barrier_explicit | SR_BARRIER_EXPL | +| | ompt_sync_region_barrier_implementation | SR_BARRIER_IMPLEMENTATION | +| | ompt_sync_region_taskwait | SR_TASKWAIT | +| | ompt_sync_region_taskgroup | SR_TASKGROUP | +| | ompt_sync_region_reduction | SR_REDUCTION | +| | ompt_sync_region_barrier_implicit_workshare | SR_BARRIER_IMPL_WORKSHARE | +| | ompt_sync_region_barrier_implicit_parallel | SR_BARRIER_IMPL_PARALLEL | +| | ompt_sync_region_barrier_teams | SR_BARRIER_TEAMS | + + +Limitations +=========== +Currently, there are some peculiarities which have to be kept in mind when using +this library: + +## Callbacks + * It is not possible to e.g. test non-EMI -AND- EMI callbacks within the same + test file. Reason: all testsuites share the initialization and therefore the + registered callbacks. + * It is not possible to check for device initialization and/or load callbacks + more than once per test file. The first testcase being run, triggers these + callbacks and is therefore the only testcase that is able to check for them. + This is because, after that, the device remains initialized. + * It is not possible to check for device finalization callbacks, as libomptest + is un-loaded before this callback occurs. Same holds true for the final + ThreadEnd event(s). + +Miscellaneous +============= + +## Default values + +To allow for easier writing of tests, many OMPT events may be created using less +parameters than actually requested by the spec -- by using default values. These +defaults are currently set to the corresponding data type's minimum as follows, +for example integers use: `std::numeric_limits::min()`. + +When an expected / user-specified event has certain values set to the +corresponding default, these values are ignored. That is, when compared to an +observed event, this property is considered as 'equal' regardless of their +actual equality relation. + +References +========== +[0]: ompTest – Unit Testing with OMPT + https://doi.org/10.1109/SCW63240.2024.00031 + +[1]: OMPTBench – OpenMP Tool Interface Conformance Testing + https://doi.org/10.1109/SCW63240.2024.00036 diff --git a/openmp/tools/omptest/cmake/omptest-config.cmake.in b/openmp/tools/omptest/cmake/omptest-config.cmake.in new file mode 100644 index 0000000000000..dca02505539b0 --- /dev/null +++ b/openmp/tools/omptest/cmake/omptest-config.cmake.in @@ -0,0 +1,29 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package. +## +################################################################################ + +# Compute installation prefix relative to this file. +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) + +# Provide header and library paths. +set(LIBOMP_HEADERS_INSTALL_PATH "${LLVM_INSTALL_PREFIX}/@LIBOMP_HEADERS_INSTALL_PATH@") +set(LIBOMP_LIBRARY_INSTALL_PATH "${LLVM_INSTALL_PREFIX}/@OPENMP_INSTALL_LIBDIR@") +set(omptest_INCLUDE_DIR "${LIBOMP_HEADERS_INSTALL_PATH}/omptest/include") +set(omptest_LIBRARY_DIR "${LIBOMP_LIBRARY_INSTALL_PATH}") + +# Provide compiler default values. +set(LLVM_BIN_INSTALL_DIR "${LLVM_INSTALL_PREFIX}/bin") +set(omptest_C_COMPILER "${LLVM_BIN_INSTALL_DIR}/clang") +set(omptest_CXX_COMPILER "${LLVM_BIN_INSTALL_DIR}/clang++") + +# Provide information, if ompTest has been built 'standalone'. +set(LIBOMPTEST_BUILD_STANDALONE "@LIBOMPTEST_BUILD_STANDALONE@") diff --git a/openmp/tools/omptest/include/AssertMacros.h b/openmp/tools/omptest/include/AssertMacros.h new file mode 100644 index 0000000000000..d5d191c10dabb --- /dev/null +++ b/openmp/tools/omptest/include/AssertMacros.h @@ -0,0 +1,138 @@ +//===- AssertMacros.h - Macro aliases for ease-of-use -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides macros to be used in unit tests for OMPT events. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_ASSERTMACROS_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_ASSERTMACROS_H + +#define OMPTEST_EXCLUDED_EVENT omptest::ObserveState::never +#define OMPTEST_REQUIRED_EVENT omptest::ObserveState::always + +/// ASSERT MACROS TO BE USED BY THE USER + +#define OMPT_GENERATE_EVENTS(NumberOfCopies, EventMacro) \ + for (size_t i = 0; i < NumberOfCopies; ++i) { \ + EventMacro \ + } + +// Handle a minimum unordered set of events +// Required events +#define OMPT_ASSERT_SET_EVENT(Name, Group, EventTy, ...) \ + SetAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_REQUIRED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SET(EventTy, ...) \ + OMPT_ASSERT_SET_EVENT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_GROUPED(Group, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_NAMED(Name, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT(Name, "", EventTy, __VA_ARGS__) +// Excluded ("NOT") events +#define OMPT_ASSERT_SET_EVENT_NOT(Name, Group, EventTy, ...) \ + SetAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_EXCLUDED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SET_NOT(EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_GROUPED_NOT(Group, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_NAMED_NOT(Name, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT(Name, "", EventTy, __VA_ARGS__) + +// Handle an exact sequence of events +// Required events +#define OMPT_ASSERT_SEQUENCE_EVENT(Name, Group, EventTy, ...) \ + SequenceAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_REQUIRED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SEQUENCE(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_GROUPED(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_NAMED(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT(Name, "", EventTy, __VA_ARGS__) +// Excluded ("NOT") events +#define OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, Group, EventTy, ...) \ + SequenceAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_EXCLUDED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SEQUENCE_NOT(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_GROUPED_NOT(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_NAMED_NOT(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, "", EventTy, __VA_ARGS__) +// Special command: suspend active assertion +// The created event is not correlated to any observed event +#define OMPT_ASSERT_SEQUENCE_SUSPEND() \ + SequenceAsserter->insert( \ + OmptAssertEvent::AssertionSuspend("", "", OMPTEST_EXCLUDED_EVENT)); +#define OMPT_ASSERT_SEQUENCE_ONLY(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT("", "", EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() +#define OMPT_ASSERT_SEQUENCE_GROUPED_ONLY(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT("", Group, EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() +#define OMPT_ASSERT_SEQUENCE_NAMED_ONLY(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT(Name, "", EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() + +#define OMPT_ASSERTER_MODE_STRICT(Asserter) \ + Asserter->setOperationMode(AssertMode::strict); +#define OMPT_ASSERTER_MODE_RELAXED(Asserter) \ + Asserter->setOperationMode(AssertMode::relaxed); +#define OMPT_ASSERT_SEQUENCE_MODE_STRICT() \ + OMPT_ASSERTER_MODE_STRICT(SequenceAsserter) +#define OMPT_ASSERT_SEQUENCE_MODE_RELAXED() \ + OMPT_ASSERTER_MODE_RELAXED(SequenceAsserter) +#define OMPT_ASSERT_SET_MODE_STRICT() OMPT_ASSERTER_MODE_STRICT(SetAsserter) +#define OMPT_ASSERT_SET_MODE_RELAXED() OMPT_ASSERTER_MODE_RELAXED(SetAsserter) + +// Enable / disable asserters entirely +#define OMPT_ASSERTER_DISABLE(Asserter) Asserter->setActive(false); +#define OMPT_ASSERTER_ENABLE(Asserter) Asserter->setActive(true); +#define OMPT_ASSERT_SET_DISABLE() OMPT_ASSERTER_DISABLE(SetAsserter) +#define OMPT_ASSERT_SET_ENABLE() OMPT_ASSERTER_ENABLE(SetAsserter) +#define OMPT_ASSERT_SEQUENCE_DISABLE() OMPT_ASSERTER_DISABLE(SequenceAsserter) +#define OMPT_ASSERT_SEQUENCE_ENABLE() OMPT_ASSERTER_ENABLE(SequenceAsserter) +#define OMPT_REPORT_EVENT_DISABLE() OMPT_ASSERTER_DISABLE(EventReporter) +#define OMPT_REPORT_EVENT_ENABLE() OMPT_ASSERTER_ENABLE(EventReporter) + +// Enable / disable certain event types for asserters +#define OMPT_ASSERTER_PERMIT_EVENT(Asserter, EventTy) \ + Asserter->permitEvent(EventTy); +#define OMPT_ASSERTER_SUPPRESS_EVENT(Asserter, EventTy) \ + Asserter->suppressEvent(EventTy); +#define OMPT_PERMIT_EVENT(EventTy) \ + OMPT_ASSERTER_PERMIT_EVENT(SetAsserter, EventTy); \ + OMPT_ASSERTER_PERMIT_EVENT(EventReporter, EventTy); \ + OMPT_ASSERTER_PERMIT_EVENT(SequenceAsserter, EventTy); +#define OMPT_SUPPRESS_EVENT(EventTy) \ + OMPT_ASSERTER_SUPPRESS_EVENT(SetAsserter, EventTy); \ + OMPT_ASSERTER_SUPPRESS_EVENT(EventReporter, EventTy); \ + OMPT_ASSERTER_SUPPRESS_EVENT(SequenceAsserter, EventTy); + +// Set logging level for asserters +// Note: Logger is a singleton, hence this will affect all asserter instances +#define OMPT_ASSERTER_LOG_LEVEL(Asserter, LogLevel) \ + Asserter->getLog()->setLoggingLevel(LogLevel); + +// Set log formatting (esp. coloring) for asserters +// Note: Logger is a singleton, hence this will affect all asserter instances +#define OMPT_ASSERTER_LOG_FORMATTED(Asserter, FormatLog) \ + Asserter->getLog()->setFormatOutput(FormatLog); + +// SyncPoint handling +#define OMPT_ASSERT_SYNC_POINT(SyncPointName) \ + flush_traced_devices(); \ + OmptCallbackHandler::get().handleAssertionSyncPoint(SyncPointName); + +#endif diff --git a/openmp/tools/omptest/include/InternalEvent.h b/openmp/tools/omptest/include/InternalEvent.h new file mode 100644 index 0000000000000..455d8d996e5f4 --- /dev/null +++ b/openmp/tools/omptest/include/InternalEvent.h @@ -0,0 +1,331 @@ +//===- InternalEvent.h - Internal event representation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Declares internal event representations along the default CTOR definition. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H + +#include "InternalEventCommon.h" + +#include +#include +#include + +#define expectedDefault(TypeName) std::numeric_limits::min() + +namespace omptest { + +namespace util { + +/// String manipulation helper function. Takes up to 8 bytes of data and returns +/// their hexadecimal representation as string. The data can be expanded to the +/// given size in bytes and will by default be prefixed with '0x'. +std::string makeHexString(uint64_t Data, bool IsPointer = true, + size_t DataBytes = 0, bool ShowHexBase = true); + +} // namespace util + +namespace internal { +// clang-format off +event_class_w_custom_body(AssertionSyncPoint, \ + AssertionSyncPoint(const std::string &Name) \ + : InternalEvent(EventTy::AssertionSyncPoint), Name(Name) {} \ + \ + const std::string Name; \ +) +event_class_stub(AssertionSuspend) +event_class_w_custom_body(ThreadBegin, \ + ThreadBegin(ompt_thread_t ThreadType) \ + : InternalEvent(EventTy::ThreadBegin), ThreadType(ThreadType) {} \ + \ + ompt_thread_t ThreadType; \ +) +event_class_w_custom_body(ThreadEnd, \ + ThreadEnd() : InternalEvent(EventTy::ThreadEnd) {} \ +) +event_class_w_custom_body(ParallelBegin, \ + ParallelBegin(int NumThreads) \ + : InternalEvent(EventTy::ParallelBegin), NumThreads(NumThreads) {} \ + \ + unsigned int NumThreads; \ +) +event_class_w_custom_body(ParallelEnd, \ + ParallelEnd(ompt_data_t *ParallelData, ompt_data_t *EncounteringTaskData, \ + int Flags, const void *CodeptrRA) \ + : InternalEvent(EventTy::ParallelEnd), ParallelData(ParallelData), \ + EncounteringTaskData(EncounteringTaskData), Flags(Flags), \ + CodeptrRA(CodeptrRA) {} \ + \ +ompt_data_t *ParallelData; \ +ompt_data_t *EncounteringTaskData; \ +int Flags; \ +const void *CodeptrRA; \ +) +event_class_w_custom_body(Work, \ + Work(ompt_work_t WorkType, ompt_scope_endpoint_t Endpoint, \ + ompt_data_t *ParallelData, ompt_data_t *TaskData, uint64_t Count, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::Work), WorkType(WorkType), Endpoint(Endpoint), \ + ParallelData(ParallelData), TaskData(TaskData), Count(Count), \ + CodeptrRA(CodeptrRA) {} \ + \ +ompt_work_t WorkType; \ +ompt_scope_endpoint_t Endpoint; \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +uint64_t Count; \ +const void *CodeptrRA; \ +) +event_class_w_custom_body(Dispatch, \ + Dispatch(ompt_data_t *ParallelData, ompt_data_t *TaskData, \ + ompt_dispatch_t Kind, ompt_data_t Instance) \ + : InternalEvent(EventTy::Dispatch), ParallelData(ParallelData), \ + TaskData(TaskData), Kind(Kind), Instance(Instance) {} \ + \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +ompt_dispatch_t Kind; \ +ompt_data_t Instance; \ +) +event_class_w_custom_body(TaskCreate, \ + TaskCreate(ompt_data_t *EncounteringTaskData, \ + const ompt_frame_t *EncounteringTaskFrame, \ + ompt_data_t *NewTaskData, int Flags, int HasDependences, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::TaskCreate), \ + EncounteringTaskData(EncounteringTaskData), \ + EncounteringTaskFrame(EncounteringTaskFrame), NewTaskData(NewTaskData), \ + Flags(Flags), HasDependences(HasDependences), CodeptrRA(CodeptrRA) {} \ + \ +ompt_data_t *EncounteringTaskData; \ +const ompt_frame_t *EncounteringTaskFrame; \ +ompt_data_t *NewTaskData; \ +int Flags; \ +int HasDependences; \ +const void *CodeptrRA; \ +) +event_class_stub(Dependences) +event_class_stub(TaskDependence) +event_class_stub(TaskSchedule) +event_class_w_custom_body(ImplicitTask, \ + ImplicitTask(ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, \ + ompt_data_t *TaskData, unsigned int ActualParallelism, \ + unsigned int Index, int Flags) \ + : InternalEvent(EventTy::ImplicitTask), Endpoint(Endpoint), \ + ParallelData(ParallelData), TaskData(TaskData), \ + ActualParallelism(ActualParallelism), Index(Index), Flags(Flags) {} \ + \ +ompt_scope_endpoint_t Endpoint; \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +unsigned int ActualParallelism; \ +unsigned int Index; \ +int Flags; \ +) +event_class_stub(Masked) +event_class_w_custom_body(SyncRegion, \ + SyncRegion(ompt_sync_region_t Kind, ompt_scope_endpoint_t Endpoint, \ + ompt_data_t *ParallelData, ompt_data_t *TaskData, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::SyncRegion), Kind(Kind), Endpoint(Endpoint), \ + ParallelData(ParallelData), TaskData(TaskData), CodeptrRA(CodeptrRA) {} \ + \ +ompt_sync_region_t Kind; \ +ompt_scope_endpoint_t Endpoint; \ +ompt_data_t *ParallelData; \ +ompt_data_t *TaskData; \ +const void *CodeptrRA; \ +) +event_class_stub(MutexAcquire) +event_class_stub(Mutex) +event_class_stub(NestLock) +event_class_stub(Flush) +event_class_stub(Cancel) +event_class_w_custom_body(Target, \ + Target(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, int DeviceNum, \ + ompt_data_t *TaskData, ompt_id_t TargetId, const void *CodeptrRA) \ + : InternalEvent(EventTy::Target), Kind(Kind), Endpoint(Endpoint), \ + DeviceNum(DeviceNum), TaskData(TaskData), TargetId(TargetId), \ + CodeptrRA(CodeptrRA) {} \ + \ + ompt_target_t Kind; \ + ompt_scope_endpoint_t Endpoint; \ + int DeviceNum; \ + ompt_data_t *TaskData; \ + ompt_id_t TargetId; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetEmi, \ + TargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, int DeviceNum, \ + ompt_data_t *TaskData, ompt_data_t *TargetTaskData, \ + ompt_data_t *TargetData, const void *CodeptrRA) \ + : InternalEvent(EventTy::TargetEmi), Kind(Kind), Endpoint(Endpoint), \ + DeviceNum(DeviceNum), TaskData(TaskData), \ + TargetTaskData(TargetTaskData), TargetData(TargetData), \ + CodeptrRA(CodeptrRA) {} \ + \ + ompt_target_t Kind; \ + ompt_scope_endpoint_t Endpoint; \ + int DeviceNum; \ + ompt_data_t *TaskData; \ + ompt_data_t *TargetTaskData; \ + ompt_data_t *TargetData; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetDataOp, \ + TargetDataOp(ompt_id_t TargetId, ompt_id_t HostOpId, \ + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, \ + void *DstAddr, int DstDeviceNum, size_t Bytes, \ + const void *CodeptrRA) \ + : InternalEvent(EventTy::TargetDataOp), TargetId(TargetId), \ + HostOpId(HostOpId), OpType(OpType), SrcAddr(SrcAddr), \ + SrcDeviceNum(SrcDeviceNum), DstAddr(DstAddr), \ + DstDeviceNum(DstDeviceNum), Bytes(Bytes), CodeptrRA(CodeptrRA) {} \ + \ + ompt_id_t TargetId; \ + ompt_id_t HostOpId; \ + ompt_target_data_op_t OpType; \ + void *SrcAddr; \ + int SrcDeviceNum; \ + void *DstAddr; \ + int DstDeviceNum; \ + size_t Bytes; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetDataOpEmi, \ + TargetDataOpEmi(ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetTaskData, \ + ompt_data_t *TargetData, ompt_id_t *HostOpId, \ + ompt_target_data_op_t OpType, void *SrcAddr, \ + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, \ + size_t Bytes, const void *CodeptrRA) \ + : InternalEvent(EventTy::TargetDataOpEmi), Endpoint(Endpoint), \ + TargetTaskData(TargetTaskData), TargetData(TargetData), \ + HostOpId(HostOpId), OpType(OpType), SrcAddr(SrcAddr), \ + SrcDeviceNum(SrcDeviceNum), DstAddr(DstAddr), \ + DstDeviceNum(DstDeviceNum), Bytes(Bytes), CodeptrRA(CodeptrRA) {} \ + \ + ompt_scope_endpoint_t Endpoint; \ + ompt_data_t *TargetTaskData; \ + ompt_data_t *TargetData; \ + ompt_id_t *HostOpId; \ + ompt_target_data_op_t OpType; \ + void *SrcAddr; \ + int SrcDeviceNum; \ + void *DstAddr; \ + int DstDeviceNum; \ + size_t Bytes; \ + const void *CodeptrRA; \ +) +event_class_w_custom_body(TargetSubmit, \ + TargetSubmit(ompt_id_t TargetId, ompt_id_t HostOpId, \ + unsigned int RequestedNumTeams) \ + : InternalEvent(EventTy::TargetSubmit), TargetId(TargetId), \ + HostOpId(HostOpId), RequestedNumTeams(RequestedNumTeams) {} \ + \ + ompt_id_t TargetId; \ + ompt_id_t HostOpId; \ + unsigned int RequestedNumTeams; \ +) +event_class_w_custom_body(TargetSubmitEmi, \ + TargetSubmitEmi(ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetData, \ + ompt_id_t *HostOpId, unsigned int RequestedNumTeams) \ + : InternalEvent(EventTy::TargetSubmitEmi), Endpoint(Endpoint), \ + TargetData(TargetData), HostOpId(HostOpId), \ + RequestedNumTeams(RequestedNumTeams) {} \ + \ + ompt_scope_endpoint_t Endpoint; \ + ompt_data_t *TargetData; \ + ompt_id_t *HostOpId; \ + unsigned int RequestedNumTeams; \ +) +event_class_stub(ControlTool) +event_class_w_custom_body(DeviceInitialize, \ + DeviceInitialize(int DeviceNum, const char *Type, ompt_device_t *Device, \ + ompt_function_lookup_t LookupFn, const char *DocStr) \ + : InternalEvent(EventTy::DeviceInitialize), DeviceNum(DeviceNum), \ + Type(Type), Device(Device), LookupFn(LookupFn), DocStr(DocStr) {} \ + \ + int DeviceNum; \ + const char *Type; \ + ompt_device_t *Device; \ + ompt_function_lookup_t LookupFn; \ + const char *DocStr; \ +) +event_class_w_custom_body(DeviceFinalize, \ + DeviceFinalize(int DeviceNum) \ + : InternalEvent(EventTy::DeviceFinalize), DeviceNum(DeviceNum) {} \ + \ + int DeviceNum; \ +) +event_class_w_custom_body(DeviceLoad, \ + DeviceLoad(int DeviceNum, const char *Filename, int64_t OffsetInFile, \ + void *VmaInFile, size_t Bytes, void *HostAddr, void *DeviceAddr, \ + uint64_t ModuleId) \ + : InternalEvent(EventTy::DeviceLoad), DeviceNum(DeviceNum), \ + Filename(Filename), OffsetInFile(OffsetInFile), VmaInFile(VmaInFile), \ + Bytes(Bytes), HostAddr(HostAddr), DeviceAddr(DeviceAddr), \ + ModuleId(ModuleId) {} \ + \ + int DeviceNum; \ + const char *Filename; \ + int64_t OffsetInFile; \ + void *VmaInFile; \ + size_t Bytes; \ + void *HostAddr; \ + void *DeviceAddr; \ + uint64_t ModuleId; \ +) +event_class_stub(DeviceUnload) +event_class_w_custom_body(BufferRequest, \ + BufferRequest(int DeviceNum, ompt_buffer_t **Buffer, size_t *Bytes) \ + : InternalEvent(EventTy::BufferRequest), DeviceNum(DeviceNum), \ + Buffer(Buffer), Bytes(Bytes) {} \ + \ + int DeviceNum; \ + ompt_buffer_t **Buffer; \ + size_t *Bytes; \ +) +event_class_w_custom_body(BufferComplete, \ + BufferComplete(int DeviceNum, ompt_buffer_t *Buffer, size_t Bytes, \ + ompt_buffer_cursor_t Begin, int BufferOwned) \ + : InternalEvent(EventTy::BufferComplete), DeviceNum(DeviceNum), \ + Buffer(Buffer), Bytes(Bytes), Begin(Begin), BufferOwned(BufferOwned) {} \ + \ + int DeviceNum; \ + ompt_buffer_t *Buffer; \ + size_t Bytes; \ + ompt_buffer_cursor_t Begin; \ + int BufferOwned; \ +) +event_class_w_custom_body(BufferRecord, \ + BufferRecord(ompt_record_ompt_t *RecordPtr) \ + : InternalEvent(EventTy::BufferRecord), RecordPtr(RecordPtr) { \ + if (RecordPtr != nullptr) Record = *RecordPtr; \ + else memset(&Record, 0, sizeof(ompt_record_ompt_t)); \ + } \ + \ + ompt_record_ompt_t Record; \ + ompt_record_ompt_t *RecordPtr; \ +) +event_class_w_custom_body(BufferRecordDeallocation, \ + BufferRecordDeallocation(ompt_buffer_t *Buffer) \ + : InternalEvent(EventTy::BufferRecordDeallocation), Buffer(Buffer) {} \ + \ + ompt_buffer_t *Buffer; \ +) +// clang-format on + +} // namespace internal + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/InternalEventCommon.h b/openmp/tools/omptest/include/InternalEventCommon.h new file mode 100644 index 0000000000000..e48eeddd975ed --- /dev/null +++ b/openmp/tools/omptest/include/InternalEventCommon.h @@ -0,0 +1,133 @@ +//===- InternalEventCommon.h - Common internal event basics -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides event types, and class/operator declaration macros. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENTCOMMON_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENTCOMMON_H + +#include "omp-tools.h" + +#include +#include + +namespace omptest { + +namespace internal { +/// Enum values are used for comparison of observed and asserted events +/// List is based on OpenMP 5.2 specification, table 19.2 (page 447) +enum class EventTy { + None, // not part of OpenMP spec, used for implementation + AssertionSyncPoint, // not part of OpenMP spec, used for implementation + AssertionSuspend, // not part of OpenMP spec, used for implementation + BufferRecord, // not part of OpenMP spec, used for implementation + BufferRecordDeallocation, // not part of OpenMP spec, used for implementation + ThreadBegin, + ThreadEnd, + ParallelBegin, + ParallelEnd, + Work, + Dispatch, + TaskCreate, // TODO: Implement + Dependences, // TODO: Implement + TaskDependence, // TODO: Implement + TaskSchedule, // TODO: Implement + ImplicitTask, // TODO: Implement + Masked, // TODO: Implement + SyncRegion, + MutexAcquire, // TODO: Implement + Mutex, // TODO: Implement + NestLock, // TODO: Implement + Flush, // TODO: Implement + Cancel, // TODO: Implement + DeviceInitialize, + DeviceFinalize, + DeviceLoad, + DeviceUnload, + BufferRequest, + BufferComplete, + TargetDataOp, + TargetDataOpEmi, + Target, + TargetEmi, + TargetSubmit, + TargetSubmitEmi, + ControlTool +}; + +struct InternalEvent { + EventTy Type; + EventTy getType() const { return Type; } + + InternalEvent() : Type(EventTy::None) {} + InternalEvent(EventTy T) : Type(T) {} + virtual ~InternalEvent() = default; + + virtual bool equals(const InternalEvent *o) const { + assert(false && "Base class implementation"); + return false; + }; + + virtual std::string toString() const { + std::string S{"InternalEvent: Type="}; + S.append(std::to_string((uint32_t)Type)); + return S; + } +}; + +#define event_class_stub(EvTy) \ + struct EvTy : public InternalEvent { \ + virtual bool equals(const InternalEvent *o) const override; \ + EvTy() : InternalEvent(EventTy::EvTy) {} \ + }; + +#define event_class_w_custom_body(EvTy, ...) \ + struct EvTy : public InternalEvent { \ + virtual bool equals(const InternalEvent *o) const override; \ + std::string toString() const override; \ + __VA_ARGS__ \ + }; + +#define event_class_operator_stub(EvTy) \ + bool operator==(const EvTy &Expected, const EvTy &Observed) { return true; } + +#define event_class_operator_w_body(EvTy, ...) \ + bool operator==(const EvTy &Expected, const EvTy &Observed) { __VA_ARGS__ } + +/// Template "base" for the cast functions generated in the define_cast_func +/// macro +template const To *cast(const InternalEvent *From) { + return nullptr; +} + +/// Generates template specialization of the cast operation for the specified +/// EvTy as the template parameter +#define define_cast_func(EvTy) \ + template <> const EvTy *cast(const InternalEvent *From) { \ + if (From->getType() == EventTy::EvTy) \ + return static_cast(From); \ + return nullptr; \ + } + +/// Auto generate the equals override to cast and dispatch to the specific class +/// operator== +#define class_equals_op(EvTy) \ + bool EvTy::equals(const InternalEvent *o) const { \ + if (const auto O = cast(o)) \ + return *this == *O; \ + return false; \ + } + +} // namespace internal + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/Logging.h b/openmp/tools/omptest/include/Logging.h new file mode 100644 index 0000000000000..0104191b1d15f --- /dev/null +++ b/openmp/tools/omptest/include/Logging.h @@ -0,0 +1,155 @@ +//===- Logging.h - General logging class ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides ompTest-tailored logging, with log-levels and formatting/coloring. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_LOGGING_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_LOGGING_H + +#include "OmptAssertEvent.h" + +#include +#include +#include +#include +#include +#include + +namespace omptest { +namespace logging { + +enum class Level : uint32_t { + // Levels (Note: DEBUG may already be reserved) + DIAGNOSTIC = 10, + INFO = 20, + WARNING = 30, + ERROR = 40, + CRITICAL = 50, + + // Types used for formatting options + Default, + ExpectedEvent, + ObservedEvent, + OffendingEvent, + + // Suppress all prints + SILENT = 0xFFFFFFFF +}; + +enum class FormatOption : uint32_t { + // General options + // Note: BOLD is actually "BRIGHT" -- But it will be perceived as 'bold' font + // It is implicitly switching colors to the 'Light' variant + // Thus, it has -NO EFFECT- when already using a Light* color + NONE = 0, + BOLD = 1, + DIM = 2, + UNDERLINED = 4, + BLINK = 5, + INVERTED = 7, + HIDDEN = 8, + // Foreground colors + COLOR_Default = 39, + COLOR_Black = 30, + COLOR_Red = 31, + COLOR_Green = 32, + COLOR_Yellow = 33, + COLOR_Blue = 34, + COLOR_Magenta = 35, + COLOR_Cyan = 36, + COLOR_LightGray = 37, + COLOR_DarkGray = 90, + COLOR_LightRed = 91, + COLOR_LightGreen = 92, + COLOR_LightYellow = 93, + COLOR_LightBlue = 94, + COLOR_LightMagenta = 95, + COLOR_LightCyan = 96, + COLOR_White = 97, + // Background colors + COLOR_BG_Default = 49, + COLOR_BG_Black = 40, + COLOR_BG_Red = 41, + COLOR_BG_Green = 42, + COLOR_BG_Yellow = 43, + COLOR_BG_Blue = 44, + COLOR_BG_Magenta = 45, + COLOR_BG_Cyan = 46, + COLOR_BG_LightGray = 47, + COLOR_BG_DarkGray = 100, + COLOR_BG_LightRed = 101, + COLOR_BG_LightGreen = 102, + COLOR_BG_LightYellow = 103, + COLOR_BG_LightBlue = 104, + COLOR_BG_LightMagenta = 105, + COLOR_BG_LightCyan = 106, + COLOR_BG_White = 107 +}; + +/// Returns a string representation of the given logging level. +const char *to_string(Level LogLevel); + +/// Returns the format options as escaped sequence, for the given logging level +std::string getFormatSequence(Level LogLevel = Level::Default); + +/// Format the given message with the provided option(s) and return it. +/// Here formatting is only concerning control sequences using character +/// which can be obtained using '\e' (on console), '\033' or '\x1B'. +std::string format(const std::string &Message, FormatOption Option); +std::string format(const std::string &Message, std::set Options); + +class Logger { +public: + Logger(Level LogLevel = Level::WARNING, std::ostream &OutStream = std::cerr, + bool FormatOutput = true); + ~Logger(); + + /// Log the given message to the output. + void log(Level LogLevel, const std::string &Message) const; + + /// Log a single event mismatch. + void eventMismatch(const omptest::OmptAssertEvent &OffendingEvent, + const std::string &Message, + Level LogLevel = Level::ERROR) const; + + /// Log an event-pair mismatch. + void eventMismatch(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent, + const std::string &Message, + Level LogLevel = Level::ERROR) const; + + /// Set if output is being formatted (e.g. colored). + void setFormatOutput(bool Enabled); + + /// Return the current (minimum) Logging Level. + Level getLoggingLevel() const; + + /// Set the (minimum) Logging Level. + void setLoggingLevel(Level LogLevel); + +private: + /// The minimum logging level that is considered by the logger instance. + Level LoggingLevel; + + /// The output stream used by the logger instance. + std::ostream &OutStream; + + /// Determine if log messages are formatted using control sequences. + bool FormatOutput; + + /// Mutex to ensure serialized logging + mutable std::mutex LogMutex; +}; + +} // namespace logging +} // namespace omptest + +#endif \ No newline at end of file diff --git a/openmp/tools/omptest/include/OmptAliases.h b/openmp/tools/omptest/include/OmptAliases.h new file mode 100644 index 0000000000000..500be5ef9f749 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAliases.h @@ -0,0 +1,85 @@ +//===- OmptAliases.h - Shorthand aliases for OMPT enum values ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines shorthand aliases for OMPT enum values, providing improved +/// ease-of-use and readability. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H + +#include + +/// Aliases for enum: ompt_scope_endpoint_t +constexpr ompt_scope_endpoint_t BEGIN = ompt_scope_begin; +constexpr ompt_scope_endpoint_t END = ompt_scope_end; +constexpr ompt_scope_endpoint_t BEGINEND = ompt_scope_beginend; + +/// Aliases for enum: ompt_target_t +constexpr ompt_target_t TARGET = ompt_target; +constexpr ompt_target_t ENTER_DATA = ompt_target_enter_data; +constexpr ompt_target_t EXIT_DATA = ompt_target_exit_data; +constexpr ompt_target_t UPDATE = ompt_target_update; +constexpr ompt_target_t TARGET_NOWAIT = ompt_target_nowait; +constexpr ompt_target_t ENTER_DATA_NOWAIT = ompt_target_enter_data_nowait; +constexpr ompt_target_t EXIT_DATA_NOWAIT = ompt_target_exit_data_nowait; +constexpr ompt_target_t UPDATE_NOWAIT = ompt_target_update_nowait; + +/// Aliases for enum: ompt_target_data_op_t +constexpr ompt_target_data_op_t ALLOC = ompt_target_data_alloc; +constexpr ompt_target_data_op_t H2D = ompt_target_data_transfer_to_device; +constexpr ompt_target_data_op_t D2H = ompt_target_data_transfer_from_device; +constexpr ompt_target_data_op_t DELETE = ompt_target_data_delete; +constexpr ompt_target_data_op_t ASSOCIATE = ompt_target_data_associate; +constexpr ompt_target_data_op_t DISASSOCIATE = ompt_target_data_disassociate; +constexpr ompt_target_data_op_t ALLOC_ASYNC = ompt_target_data_alloc_async; +constexpr ompt_target_data_op_t H2D_ASYNC = + ompt_target_data_transfer_to_device_async; +constexpr ompt_target_data_op_t D2H_ASYNC = + ompt_target_data_transfer_from_device_async; +constexpr ompt_target_data_op_t DELETE_ASYNC = ompt_target_data_delete_async; + +/// Aliases for enum: ompt_callbacks_t (partial) +constexpr ompt_callbacks_t CB_TARGET = ompt_callback_target; +constexpr ompt_callbacks_t CB_DATAOP = ompt_callback_target_data_op; +constexpr ompt_callbacks_t CB_KERNEL = ompt_callback_target_submit; + +/// Aliases for enum: ompt_work_t +constexpr ompt_work_t WORK_LOOP = ompt_work_loop; +constexpr ompt_work_t WORK_SECT = ompt_work_sections; +constexpr ompt_work_t WORK_EXEC = ompt_work_single_executor; +constexpr ompt_work_t WORK_SINGLE = ompt_work_single_other; +constexpr ompt_work_t WORK_SHARE = ompt_work_workshare; +constexpr ompt_work_t WORK_DIST = ompt_work_distribute; +constexpr ompt_work_t WORK_TASK = ompt_work_taskloop; +constexpr ompt_work_t WORK_SCOPE = ompt_work_scope; +constexpr ompt_work_t WORK_LOOP_STA = ompt_work_loop_static; +constexpr ompt_work_t WORK_LOOP_DYN = ompt_work_loop_dynamic; +constexpr ompt_work_t WORK_LOOP_GUI = ompt_work_loop_guided; +constexpr ompt_work_t WORK_LOOP_OTH = ompt_work_loop_other; + +/// Aliases for enum: ompt_sync_region_t +constexpr ompt_sync_region_t SR_BARRIER = ompt_sync_region_barrier; +constexpr ompt_sync_region_t SR_BARRIER_IMPL = + ompt_sync_region_barrier_implicit; +constexpr ompt_sync_region_t SR_BARRIER_EXPL = + ompt_sync_region_barrier_explicit; +constexpr ompt_sync_region_t SR_BARRIER_IMPLEMENTATION = + ompt_sync_region_barrier_implementation; +constexpr ompt_sync_region_t SR_TASKWAIT = ompt_sync_region_taskwait; +constexpr ompt_sync_region_t SR_TASKGROUP = ompt_sync_region_taskgroup; +constexpr ompt_sync_region_t SR_REDUCTION = ompt_sync_region_reduction; +constexpr ompt_sync_region_t SR_BARRIER_IMPL_WORKSHARE = + ompt_sync_region_barrier_implicit_workshare; +constexpr ompt_sync_region_t SR_BARRIER_IMPL_PARALLEL = + ompt_sync_region_barrier_implicit_parallel; +constexpr ompt_sync_region_t SR_BARRIER_TEAMS = ompt_sync_region_barrier_teams; + +#endif diff --git a/openmp/tools/omptest/include/OmptAssertEvent.h b/openmp/tools/omptest/include/OmptAssertEvent.h new file mode 100644 index 0000000000000..87d187c823796 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAssertEvent.h @@ -0,0 +1,377 @@ +//===- OmptAssertEvent.h - Assertion event declarations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains assertion event constructors, for generally all observable events. +/// This includes user-generated events, like synchronization. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H + +#include "InternalEvent.h" +#include "omp-tools.h" + +#include +#include +#include +#include + +namespace omptest { + +enum class ObserveState { generated, always, never }; + +/// Helper function, returning an ObserveState string representation +const char *to_string(ObserveState State); + +/// Assertion event struct, provides statically callable CTORs. +struct OmptAssertEvent { + static OmptAssertEvent AssertionSyncPoint(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + const std::string &SyncPointName); + + static OmptAssertEvent AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType); + + static OmptAssertEvent ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads); + + static OmptAssertEvent ParallelEnd( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + uint64_t Count = expectedDefault(uint64_t), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_dispatch_t Kind = expectedDefault(ompt_dispatch_t), + ompt_data_t Instance = expectedDefault(ompt_data_t)); + + static OmptAssertEvent + TaskCreate(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + const ompt_frame_t *EncounteringTaskFrame = + expectedDefault(ompt_frame_t *), + ompt_data_t *NewTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + int HasDependences = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent + ImplicitTask(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + unsigned int ActualParallelism = expectedDefault(unsigned int), + unsigned int Index = expectedDefault(unsigned int), + int Flags = expectedDefault(int)); + + static OmptAssertEvent + SyncRegion(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_id_t TargetId, + ompt_id_t HostOpId, ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, size_t Bytes, + const void *CodeptrRA); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + size_t Bytes = expectedDefault(size_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOpEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, + ompt_id_t *HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, + int DstDeviceNum, size_t Bytes, const void *CodeptrRA); + + static OmptAssertEvent + TargetDataOpEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + ompt_scope_endpoint_t Endpoint, + size_t Bytes = expectedDefault(size_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + ompt_id_t *HostOpId = expectedDefault(ompt_id_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams); + + static OmptAssertEvent + TargetSubmit(const std::string &Name, const std::string &Group, + const ObserveState &Expected, unsigned int RequestedNumTeams, + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + static OmptAssertEvent + TargetSubmitEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams); + + static OmptAssertEvent + TargetSubmitEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, unsigned int RequestedNumTeams, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + ompt_id_t *HostOpId = expectedDefault(ompt_id_t *)); + + static OmptAssertEvent ControlTool(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent DeviceInitialize( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Type = expectedDefault(const char *), + ompt_device_t *Device = expectedDefault(ompt_device_t *), + ompt_function_lookup_t LookupFn = expectedDefault(ompt_function_lookup_t), + const char *DocumentationStr = expectedDefault(const char *)); + + static OmptAssertEvent DeviceFinalize(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum); + + static OmptAssertEvent + DeviceLoad(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Filename = expectedDefault(const char *), + int64_t OffsetInFile = expectedDefault(int64_t), + void *VmaInFile = expectedDefault(void *), + size_t Bytes = expectedDefault(size_t), + void *HostAddr = expectedDefault(void *), + void *DeviceAddr = expectedDefault(void *), + uint64_t ModuleId = expectedDefault(int64_t)); + + static OmptAssertEvent DeviceUnload(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent BufferRequest(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum, ompt_buffer_t **Buffer, + size_t *Bytes); + + static OmptAssertEvent + BufferComplete(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned); + + static OmptAssertEvent BufferRecord(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_record_ompt_t *Record); + + /// Handle type = ompt_record_target_t + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_id_t TaskId = expectedDefault(ompt_id_t), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_data_op + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + std::pair Timeframe, + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_data_op + static OmptAssertEvent BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes = expectedDefault(size_t), + ompt_device_time_t MinimumTimeDelta = expectedDefault(ompt_device_time_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_submit + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + std::pair Timeframe, + unsigned int RequestedNumTeams = expectedDefault(unsigned int), + unsigned int GrantedNumTeams = expectedDefault(unsigned int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + /// Handle type = ompt_callback_target_submit + /// Note: This will also act as the simplest default CTOR + static OmptAssertEvent BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_device_time_t MinimumTimeDelta = expectedDefault(ompt_device_time_t), + unsigned int RequestedNumTeams = expectedDefault(unsigned int), + unsigned int GrantedNumTeams = expectedDefault(unsigned int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + static OmptAssertEvent BufferRecordDeallocation(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_buffer_t *Buffer); + + /// Allow move construction (due to std::unique_ptr) + OmptAssertEvent(OmptAssertEvent &&o) = default; + OmptAssertEvent &operator=(OmptAssertEvent &&o) = default; + + /// Get the event's name + std::string getEventName() const; + + /// Get the event's group name + std::string getEventGroup() const; + + /// Get the event's expected observation state + ObserveState getEventExpectedState() const; + + /// Return the actual event type enum value + internal::EventTy getEventType() const; + + /// Get a pointer to the internal event + internal::InternalEvent *getEvent() const; + + /// Make events comparable + friend bool operator==(const OmptAssertEvent &A, const OmptAssertEvent &B); + + /// Returns the string representation of the event + std::string toString(bool PrefixEventName = false) const; + +private: + OmptAssertEvent(const std::string &Name, const std::string &Group, + const ObserveState &Expected, internal::InternalEvent *IE); + OmptAssertEvent(const OmptAssertEvent &o) = delete; + + /// Determine the event name. Either it is provided directly or determined + /// from the calling function's name. + static std::string getName(const std::string &Name, + const char *Caller = __builtin_FUNCTION()) { + std::string EName = Name; + if (EName.empty()) + EName.append(Caller).append(" (auto generated)"); + + return EName; + } + + /// Determine the event name. Either it is provided directly or "default". + static std::string getGroup(const std::string &Group) { + if (Group.empty()) + return "default"; + + return Group; + } + + std::string Name; + std::string Group; + ObserveState ExpectedState; + std::unique_ptr TheEvent; +}; + +/// POD type, which holds the target region id, corresponding to an event group. +struct AssertEventGroup { + AssertEventGroup(uint64_t TargetRegion) : TargetRegion(TargetRegion) {} + uint64_t TargetRegion; +}; + +bool operator==(const OmptAssertEvent &A, const OmptAssertEvent &B); + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/OmptAsserter.h b/openmp/tools/omptest/include/OmptAsserter.h new file mode 100644 index 0000000000000..64cbb5f3642f9 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAsserter.h @@ -0,0 +1,291 @@ +//===- OmptAsserter.h - Asserter-related classes, enums, etc. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains all asserter-related class declarations and important enums. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTER_H + +#include "Logging.h" +#include "OmptAssertEvent.h" + +#include +#include +#include +#include +#include +#include + +namespace omptest { + +// Forward declaration. +class OmptEventGroupInterface; + +enum class AssertMode { strict, relaxed }; +enum class AssertState { pass, fail }; + +/// General base class for the subscriber/notification pattern in +/// OmptCallbachHandler. Derived classes need to implement the notify method. +class OmptListener { +public: + virtual ~OmptListener() = default; + + /// Called for each registered OMPT event of the OmptCallbackHandler + virtual void notify(omptest::OmptAssertEvent &&AE) = 0; + + /// Control whether this asserter should be considered 'active'. + void setActive(bool Enabled); + + /// Check if this asserter is considered 'active'. + bool isActive(); + + /// Check if the given event type is in the set of suppressed event types. + bool isSuppressedEventType(omptest::internal::EventTy EvTy); + + /// Remove the given event type to the set of suppressed events. + void permitEvent(omptest::internal::EventTy EvTy); + + /// Add the given event type to the set of suppressed events. + void suppressEvent(omptest::internal::EventTy EvTy); + +private: + bool Active{true}; + + // Add event types to the set of suppressed events by default. + std::set SuppressedEvents{ + omptest::internal::EventTy::ThreadBegin, + omptest::internal::EventTy::ThreadEnd, + omptest::internal::EventTy::ParallelBegin, + omptest::internal::EventTy::ParallelEnd, + omptest::internal::EventTy::Work, + omptest::internal::EventTy::Dispatch, + omptest::internal::EventTy::TaskCreate, + omptest::internal::EventTy::Dependences, + omptest::internal::EventTy::TaskDependence, + omptest::internal::EventTy::TaskSchedule, + omptest::internal::EventTy::ImplicitTask, + omptest::internal::EventTy::Masked, + omptest::internal::EventTy::SyncRegion, + omptest::internal::EventTy::MutexAcquire, + omptest::internal::EventTy::Mutex, + omptest::internal::EventTy::NestLock, + omptest::internal::EventTy::Flush, + omptest::internal::EventTy::Cancel}; +}; + +/// Base class for asserting on OMPT events +class OmptAsserter : public OmptListener { +public: + OmptAsserter(); + virtual ~OmptAsserter() = default; + + /// Add an event to the asserter's internal data structure. + virtual void insert(omptest::OmptAssertEvent &&AE); + + /// Called from the CallbackHandler with a corresponding AssertEvent to which + /// callback was handled. + void notify(omptest::OmptAssertEvent &&AE) override; + + /// Implemented in subclasses to implement what should actually be done with + /// the notification. + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) = 0; + + /// Get the number of currently remaining events, with: ObserveState::always. + virtual size_t getRemainingEventCount() = 0; + + /// Get the total number of received, effective notifications. + int getNotificationCount() { return NumNotifications; } + + /// Get the total number of successful assertion checks. + int getSuccessfulAssertionCount() { return NumSuccessfulAsserts; } + + /// Get the asserter's current operationmode: e.g.: strict or relaxed. + AssertMode getOperationMode() { return OperationMode; } + + /// Return the asserter's current state. + omptest::AssertState getState() { return State; } + + /// Determine and return the asserter's state. + virtual omptest::AssertState checkState(); + + /// Accessor for the event group interface. + std::shared_ptr getEventGroups() const { + return EventGroups; + } + + /// Accessor for the event group interface. + std::shared_ptr getLog() const { return Log; } + + /// Check the observed events' group association. If the event indicates the + /// begin/end of an OpenMP target region, we will create/deprecate the + /// expected event's group. Return true if the expected event group exists + /// (and is active), otherwise: false. Note: BufferRecords may also match with + /// deprecated groups as they may be delivered asynchronously. + bool verifyEventGroups(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent); + + /// Set the asserter's mode of operation w.r.t. assertion. + void setOperationMode(AssertMode Mode); + + /// The total number of effective notifications. For example, if specific + /// notifications are to be ignored, they will not count towards this total. + int NumNotifications{0}; + + /// The number of successful assertion checks. + int NumSuccessfulAsserts{0}; + +protected: + /// The asserter's current state. + omptest::AssertState State{omptest::AssertState::pass}; + + /// Mutex to avoid data races w.r.t. event notifications and/or insertions. + std::mutex AssertMutex; + + /// Pointer to the OmptEventGroupInterface. + std::shared_ptr EventGroups{nullptr}; + + /// Pointer to the logging instance. + std::shared_ptr Log{nullptr}; + + /// Operation mode during assertion / notification. + AssertMode OperationMode{AssertMode::strict}; + +private: + /// Mutex for creating/accessing the singleton members + static std::mutex StaticMemberAccessMutex; + + /// Static member to manage the singleton event group interface instance + static std::weak_ptr EventGroupInterfaceInstance; + + /// Static member to manage the singleton logging instance + static std::weak_ptr LoggingInstance; +}; + +/// Class that can assert in a sequenced fashion, i.e., events have to occur in +/// the order they were registered +class OmptSequencedAsserter : public OmptAsserter { +public: + OmptSequencedAsserter() : OmptAsserter(), NextEvent(0) {} + + /// Add the event to the in-sequence set of events that the asserter should + /// check for. + void insert(omptest::OmptAssertEvent &&AE) override; + + /// Implements the asserter's actual logic + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) override; + + size_t getRemainingEventCount() override; + + omptest::AssertState checkState() override; + + bool AssertionSuspended{false}; + +protected: + /// Notification helper function, implementing SyncPoint logic. Returns true + /// in case of consumed event, indicating early exit of notification. + bool consumeSyncPoint(const omptest::OmptAssertEvent &AE); + + /// Notification helper function, implementing excess event notification + /// logic. Returns true when no more events were expected, indicating early + /// exit of notification. + bool checkExcessNotify(const omptest::OmptAssertEvent &AE); + + /// Notification helper function, implementing Suspend logic. Returns true + /// in case of consumed event, indicating early exit of notification. + bool consumeSuspend(); + + /// Notification helper function, implementing regular event notification + /// logic. Returns true when a matching event was encountered, indicating + /// early exit of notification. + bool consumeRegularEvent(const omptest::OmptAssertEvent &AE); + +public: + /// Index of the next, expected event. + size_t NextEvent{0}; + std::vector Events{}; +}; + +/// Class that asserts with set semantics, i.e., unordered +struct OmptEventAsserter : public OmptAsserter { + OmptEventAsserter() : OmptAsserter(), NumEvents(0), Events() {} + + /// Add the event to the set of events that the asserter should check for. + void insert(omptest::OmptAssertEvent &&AE) override; + + /// Implements the asserter's logic + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) override; + + size_t getRemainingEventCount() override; + + omptest::AssertState checkState() override; + + size_t NumEvents{0}; + + /// For now use vector (but do set semantics) + // TODO std::unordered_set? + std::vector Events{}; +}; + +/// Class that reports the occurred events +class OmptEventReporter : public OmptListener { +public: + OmptEventReporter(std::ostream &OutStream = std::cout) + : OutStream(OutStream) {} + + /// Called from the CallbackHandler with a corresponding AssertEvent to which + /// callback was handled. + void notify(omptest::OmptAssertEvent &&AE) override; + +private: + std::ostream &OutStream; +}; + +/// This class provides the members and methods to manage event groups and +/// SyncPoints in conjunction with asserters. Most importantly it maintains a +/// coherent view of active and past events or SyncPoints. +class OmptEventGroupInterface { +public: + OmptEventGroupInterface() = default; + ~OmptEventGroupInterface() = default; + + /// Non-copyable and non-movable + OmptEventGroupInterface(const OmptEventGroupInterface &) = delete; + OmptEventGroupInterface &operator=(const OmptEventGroupInterface &) = delete; + OmptEventGroupInterface(OmptEventGroupInterface &&) = delete; + OmptEventGroupInterface &operator=(OmptEventGroupInterface &&) = delete; + + /// Add given group to the set of active event groups. Effectively connecting + /// the given groupname (expected) with a target region id (observed). + bool addActiveEventGroup(const std::string &GroupName, + omptest::AssertEventGroup Group); + + /// Move given group from the set of active event groups to the set of + /// previously active event groups. + bool deprecateActiveEventGroup(const std::string &GroupName); + + /// Check if given group is currently part of the active event groups. + bool checkActiveEventGroups(const std::string &GroupName, + omptest::AssertEventGroup Group); + + /// Check if given group is currently part of the deprecated event groups. + bool checkDeprecatedEventGroups(const std::string &GroupName, + omptest::AssertEventGroup Group); + +private: + mutable std::mutex GroupMutex; + std::map ActiveEventGroups{}; + std::map DeprecatedEventGroups{}; + std::set EncounteredSyncPoints{}; +}; + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/OmptCallbackHandler.h b/openmp/tools/omptest/include/OmptCallbackHandler.h new file mode 100644 index 0000000000000..40076c386107e --- /dev/null +++ b/openmp/tools/omptest/include/OmptCallbackHandler.h @@ -0,0 +1,165 @@ +//===- OmptCallbackHandler.h - Callback reception and handling --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides the OMPT callback handling declarations. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTCALLBACKHANDLER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTCALLBACKHANDLER_H + +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" + +#include "omp-tools.h" + +#include + +namespace omptest { + +/// Handler class to do whatever is needed to be done when a callback is invoked +/// by the OMP runtime +/// Supports a RecordAndReplay mechanism in which all OMPT events are recorded +/// and then replayed. This is so that a test can assert on, e.g., a device +/// initialize event, even though this would occur before a unit test is +/// actually executed. +class OmptCallbackHandler { +public: + ~OmptCallbackHandler() = default; + + /// Singleton handler + static OmptCallbackHandler &get(); + + /// Subscribe a listener to be notified for OMPT events + void subscribe(OmptListener *Listener); + + /// Remove all subscribers + void clearSubscribers(); + + /// When the record and replay mechanism is enabled this replays all OMPT + /// events + void replay(); + + /// Special asserter callback which checks that upon encountering the + /// synchronization point, all expected events have been processed. That is: + /// there are currently no remaining expected events for any asserter. + void handleAssertionSyncPoint(const std::string &SyncPointName); + + void handleThreadBegin(ompt_thread_t ThreadType, ompt_data_t *ThreadData); + + void handleThreadEnd(ompt_data_t *ThreadData); + + void handleTaskCreate(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *NewTaskData, int Flags, int HasDependences, + const void *CodeptrRA); + + void handleTaskSchedule(ompt_data_t *PriorTaskData, + ompt_task_status_t PriorTaskStatus, + ompt_data_t *NextTaskData); + + void handleImplicitTask(ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + unsigned int ActualParallelism, unsigned int Index, + int Flags); + + void handleParallelBegin(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *ParallelData, + unsigned int RequestedParallelism, int Flags, + const void *CodeptrRA); + + void handleParallelEnd(ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, int Flags, + const void *CodeptrRA); + + void handleDeviceInitialize(int DeviceNum, const char *Type, + ompt_device_t *Device, + ompt_function_lookup_t LookupFn, + const char *DocumentationStr); + + void handleDeviceFinalize(int DeviceNum); + + void handleTarget(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, ompt_id_t TargetId, + const void *CodeptrRA); + + void handleTargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, + const void *CodeptrRA); + + void handleTargetSubmit(ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams); + + void handleTargetSubmitEmi(ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams); + + void handleTargetDataOp(ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA); + + void handleTargetDataOpEmi(ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA); + + void handleDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, uint64_t ModuleId); + + void handleDeviceUnload(int DeviceNum, uint64_t ModuleId); + + void handleBufferRequest(int DeviceNum, ompt_buffer_t **Buffer, + size_t *Bytes); + + void handleBufferComplete(int DeviceNum, ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned); + + void handleBufferRecord(ompt_record_ompt_t *Record); + + void handleBufferRecordDeallocation(ompt_buffer_t *Buffer); + + /// Not needed for a conforming minimal OMPT implementation + void handleWork(ompt_work_t WorkType, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + uint64_t Count, const void *CodeptrRA); + + void handleDispatch(ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance); + + void handleSyncRegion(ompt_sync_region_t Kind, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + const void *CodeptrRA); + +private: + /// Wrapper around emplace_back for potential additional logging / checking or + /// so + void recordEvent(OmptAssertEvent &&Event); + + /// Listeners to be notified + std::vector Subscribers; + + /// Toggle if OMPT events should notify subscribers immediately or not + bool RecordAndReplay{false}; + + /// Recorded events in Record and Replay mode + std::vector RecordedEvents; +}; + +} // namespace omptest + +// Pointer to global callback handler +extern omptest::OmptCallbackHandler *Handler; + +#endif diff --git a/openmp/tools/omptest/include/OmptTester.h b/openmp/tools/omptest/include/OmptTester.h new file mode 100644 index 0000000000000..155e61d5f7482 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTester.h @@ -0,0 +1,60 @@ +//===- OmptTester.h - Main header for ompTest usage -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the main header file for usage of the ompTest library. +/// Depending on the build either 'standalone' or GoogleTest headers are +/// included and corresponding main-function macros are defined. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTER_H + +#include "AssertMacros.h" +#include "Logging.h" +#include "OmptAliases.h" +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptCallbackHandler.h" + +#include +#include +#include +#include +#include +#include +#include + +// Standalone header section +#ifdef OPENMP_LIBOMPTEST_BUILD_STANDALONE + +#include "OmptTesterStandalone.h" + +// Define standalone main function (place once at the bottom of a testsuite) +#define OMPTEST_TESTSUITE_MAIN() \ + int main(int argc, char **argv) { \ + Runner R; \ + return R.run(); \ + } + +// GoogleTest header section +#else + +#include "OmptTesterGoogleTest.h" + +// Define GoogleTest main function (place once at the bottom of a testsuite) +#define OMPTEST_TESTSUITE_MAIN() \ + int main(int argc, char **argv) { \ + testing::InitGoogleTest(&argc, argv); \ + return RUN_ALL_TESTS(); \ + } + +#endif + +#endif diff --git a/openmp/tools/omptest/include/OmptTesterGlobals.h b/openmp/tools/omptest/include/OmptTesterGlobals.h new file mode 100644 index 0000000000000..62f443aed80e0 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterGlobals.h @@ -0,0 +1,36 @@ +//===- OmptTesterGlobals.h - Global function declarations -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains global function declarations, esp. for OMPT symbols. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGLOBALS_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGLOBALS_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version); +int start_trace(ompt_device_t *Device); +int flush_trace(ompt_device_t *Device); +// Function which calls flush_trace(ompt_device_t *) on all traced devices. +int flush_traced_devices(); +int stop_trace(ompt_device_t *Device); +// Function which calls stop_trace(ompt_device_t *) on all traced devices. +int stop_trace_devices(); +void libomptest_global_eventreporter_set_active(bool State); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/openmp/tools/omptest/include/OmptTesterGoogleTest.h b/openmp/tools/omptest/include/OmptTesterGoogleTest.h new file mode 100644 index 0000000000000..51b94bc678f50 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterGoogleTest.h @@ -0,0 +1,86 @@ +//===- OmptTesterGoogleTest.h - GoogleTest header variant -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the GoogleTest-based header variant, defining the +/// actual test classes and their behavior. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGOOGLETEST_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGOOGLETEST_H + +#include "AssertMacros.h" +#include "OmptAliases.h" +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptCallbackHandler.h" +#include "OmptTesterGlobals.h" + +// This will allow us to override the "TEST" macro of gtest +#define GTEST_DONT_DEFINE_TEST 1 +#include "gtest/gtest.h" + +namespace testing { +class GTEST_API_ OmptTestCase : public testing::Test, + public omptest::OmptEventGroupInterface { +public: + std::unique_ptr SequenceAsserter = + std::make_unique(); + std::unique_ptr SetAsserter = + std::make_unique(); + std::unique_ptr EventReporter = + std::make_unique(); + +protected: + void SetUp() override { + omptest::OmptCallbackHandler::get().subscribe(SequenceAsserter.get()); + omptest::OmptCallbackHandler::get().subscribe(SetAsserter.get()); + omptest::OmptCallbackHandler::get().subscribe(EventReporter.get()); + } + + void TearDown() override { + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // Remove subscribers to not be notified of events after test execution. + omptest::OmptCallbackHandler::get().clearSubscribers(); + + // This common testcase must not encounter any failures. + if (SequenceAsserter->checkState() == omptest::AssertState::fail || + SetAsserter->checkState() == omptest::AssertState::fail) + ADD_FAILURE(); + } +}; + +class GTEST_API_ OmptTestCaseXFail : public testing::OmptTestCase { +protected: + void TearDown() override { + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // Remove subscribers to not be notified of events after test execution. + omptest::OmptCallbackHandler::get().clearSubscribers(); + + // This eXpectedly failing testcase has to encounter at least one failure. + if (SequenceAsserter->checkState() == omptest::AssertState::pass && + SetAsserter->checkState() == omptest::AssertState::pass) + ADD_FAILURE(); + } +}; +} // namespace testing + +#define TEST(test_suite_name, test_name) \ + GTEST_TEST_(test_suite_name, test_name, ::testing::OmptTestCase, \ + ::testing::internal::GetTypeId<::testing::OmptTestCase>()) + +#define TEST_XFAIL(test_suite_name, test_name) \ + GTEST_TEST_(test_suite_name, test_name, ::testing::OmptTestCaseXFail, \ + ::testing::internal::GetTypeId<::testing::OmptTestCaseXFail>()) + +#endif // include guard diff --git a/openmp/tools/omptest/include/OmptTesterStandalone.h b/openmp/tools/omptest/include/OmptTesterStandalone.h new file mode 100644 index 0000000000000..06649031c5d1c --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterStandalone.h @@ -0,0 +1,123 @@ +//===- OmptTesterStandalone.h - Standalone header variant -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the 'standalone' header variant, defining the actual +/// test classes and their behavior (it does not have external dependencies). +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERSTANDALONE_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERSTANDALONE_H + +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptTesterGlobals.h" + +#include +#include + +// Forward declarations. +namespace omptest { +struct OmptEventAsserter; +class OmptEventReporter; +class OmptSequencedAsserter; +} // namespace omptest + +struct Error { + operator bool() { return Fail; } + bool Fail; +}; + +/// A pretty crude test case abstraction +struct TestCase { + TestCase(const std::string &name) + : IsDisabled(name.rfind("DISABLED_", 0) == 0), Name(name) {} + TestCase(const std::string &name, const omptest::AssertState &expected) + : IsDisabled(name.rfind("DISABLED_", 0) == 0), Name(name), + ExpectedState(expected) {} + virtual ~TestCase() = default; + Error exec(); + virtual void execImpl() { assert(false && "Allocating base class"); } + + bool IsDisabled{false}; + std::string Name; + omptest::AssertState ExpectedState{omptest::AssertState::pass}; + omptest::AssertState ResultState{omptest::AssertState::pass}; + + std::unique_ptr SequenceAsserter = + std::make_unique(); + std::unique_ptr SetAsserter = + std::make_unique(); + std::unique_ptr EventReporter = + std::make_unique(); +}; +/// A pretty crude test suite abstraction +struct TestSuite { + using TestCaseVec = std::vector>; + std::string Name; + TestSuite() = default; + TestSuite(const TestSuite &O) = delete; + TestSuite(TestSuite &&O); + void setup(); + void teardown(); + TestCaseVec::iterator begin(); + TestCaseVec::iterator end(); + TestCaseVec TestCases; +}; +/// Static class used to register all test cases and provide them to the driver +class TestRegistrar { +public: + static TestRegistrar &get(); + static std::vector getTestSuites(); + static void addCaseToSuite(TestCase *TC, std::string TSName); + +private: + TestRegistrar() = default; + TestRegistrar(const TestRegistrar &o) = delete; + TestRegistrar operator=(const TestRegistrar &o) = delete; + // Keep tests in order 'of appearance' (top -> bottom), avoid unordered_map + static std::map Tests; +}; +/// Hack to register test cases +struct Registerer { + Registerer(TestCase *TC, const std::string SuiteName); +}; +/// Eventually executes all test suites and cases, should contain logic to skip +/// stuff if needed +struct Runner { + Runner() : TestSuites(TestRegistrar::get().getTestSuites()) {} + int run(); + void reportError(const Error &Err); + void abortOrKeepGoing(); + // Print an execution summary of all testsuites and their corresponding + // testcases. + void printSummary(); + std::vector TestSuites; +}; + +/// MACROS TO DEFINE A TESTSUITE + TESTCASE (like GoogleTest does) +#define XQUOTE(str) QUOTE(str) +#define QUOTE(str) #str + +#define TEST_TEMPLATE(SuiteName, CaseName, ExpectedState) \ + struct SuiteName##_##CaseName : public TestCase { \ + SuiteName##_##CaseName() \ + : TestCase(XQUOTE(CaseName), omptest::AssertState::ExpectedState) {} \ + virtual void execImpl() override; \ + }; \ + static Registerer R_##SuiteName##CaseName(new SuiteName##_##CaseName(), \ + #SuiteName); \ + void SuiteName##_##CaseName::execImpl() + +#define TEST(SuiteName, CaseName) \ + TEST_TEMPLATE(SuiteName, CaseName, /*ExpectedState=*/pass) +#define TEST_XFAIL(SuiteName, CaseName) \ + TEST_TEMPLATE(SuiteName, CaseName, /*ExpectedState=*/fail) + +#endif diff --git a/openmp/tools/omptest/src/InternalEvent.cpp b/openmp/tools/omptest/src/InternalEvent.cpp new file mode 100644 index 0000000000000..87daf5a6a31ba --- /dev/null +++ b/openmp/tools/omptest/src/InternalEvent.cpp @@ -0,0 +1,367 @@ +//===- InternalEvent.cpp - Internal event implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements internal event representation methods and helper functions. +/// +//===----------------------------------------------------------------------===// + +#include "InternalEvent.h" + +#include +#include + +using namespace omptest; +using namespace util; + +std::string util::makeHexString(uint64_t Data, bool IsPointer, size_t MinBytes, + bool ShowHexBase) { + if (Data == 0 && IsPointer) + return "(nil)"; + + thread_local std::ostringstream os; + // Clear the content of the stream + os.str(std::string()); + + // Manually prefixing "0x" will make the use of std::setfill more easy + if (ShowHexBase) + os << "0x"; + + // Default to 32bit (8 hex digits) width, if exceeding 64bit or zero value + size_t NumDigits = (MinBytes > 0 && MinBytes < 9) ? (MinBytes << 1) : 8; + + if (MinBytes > 0) + os << std::setfill('0') << std::setw(NumDigits); + + os << std::hex << Data; + return os.str(); +} + +std::string internal::AssertionSyncPoint::toString() const { + std::string S{"Assertion SyncPoint: '"}; + S.append(Name).append(1, '\''); + return S; +} + +std::string internal::ThreadBegin::toString() const { + std::string S{"OMPT Callback ThreadBegin: "}; + S.append("ThreadType=").append(std::to_string(ThreadType)); + return S; +} + +std::string internal::ThreadEnd::toString() const { + std::string S{"OMPT Callback ThreadEnd"}; + return S; +} + +std::string internal::ParallelBegin::toString() const { + std::string S{"OMPT Callback ParallelBegin: "}; + S.append("NumThreads=").append(std::to_string(NumThreads)); + return S; +} + +std::string internal::ParallelEnd::toString() const { + // TODO: Should we expose more detailed info here? + std::string S{"OMPT Callback ParallelEnd"}; + return S; +} + +std::string internal::Work::toString() const { + std::string S{"OMPT Callback Work: "}; + S.append("work_type=").append(std::to_string(WorkType)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" count=").append(std::to_string(Count)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::Dispatch::toString() const { + std::string S{"OMPT Callback Dispatch: "}; + S.append("parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" kind=").append(std::to_string(Kind)); + // TODO Check what to print for instance in all different cases + if (Kind == ompt_dispatch_iteration) { + S.append(" instance=[it=") + .append(std::to_string(Instance.value)) + .append(1, ']'); + } else if (Kind == ompt_dispatch_section) { + S.append(" instance=[ptr=") + .append(makeHexString((uint64_t)Instance.ptr)) + .append(1, ']'); + } else if ((Kind == ompt_dispatch_ws_loop_chunk || + Kind == ompt_dispatch_taskloop_chunk || + Kind == ompt_dispatch_distribute_chunk) && + Instance.ptr != nullptr) { + auto Chunk = static_cast(Instance.ptr); + S.append(" instance=[chunk=(start=") + .append(std::to_string(Chunk->start)) + .append(", iterations=") + .append(std::to_string(Chunk->iterations)) + .append(")]"); + } + return S; +} + +std::string internal::TaskCreate::toString() const { + std::string S{"OMPT Callback TaskCreate: "}; + S.append("encountering_task_data=") + .append(makeHexString((uint64_t)EncounteringTaskData)); + S.append(" encountering_task_frame=") + .append(makeHexString((uint64_t)EncounteringTaskFrame)); + S.append(" new_task_data=").append(makeHexString((uint64_t)NewTaskData)); + S.append(" flags=").append(std::to_string(Flags)); + S.append(" has_dependences=").append(std::to_string(HasDependences)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::ImplicitTask::toString() const { + std::string S{"OMPT Callback ImplicitTask: "}; + S.append("endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" actual_parallelism=").append(std::to_string(ActualParallelism)); + S.append(" index=").append(std::to_string(Index)); + S.append(" flags=").append(std::to_string(Flags)); + return S; +} + +std::string internal::SyncRegion::toString() const { + std::string S{"OMPT Callback SyncRegion: "}; + S.append("kind=").append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::Target::toString() const { + // TODO Should we canonicalize the string prefix (use "OMPT ..." everywhere)? + std::string S{"Callback Target: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" kind=").append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" device_num=").append(std::to_string(DeviceNum)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetEmi::toString() const { + // TODO Should we canonicalize the string prefix (use "OMPT ..." everywhere)? + std::string S{"Callback Target EMI: kind="}; + S.append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" device_num=").append(std::to_string(DeviceNum)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" (") + .append(makeHexString((uint64_t)(TaskData) ? TaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_task_data=") + .append(makeHexString((uint64_t)TargetTaskData)); + S.append(" (") + .append( + makeHexString((uint64_t)(TargetTaskData) ? TargetTaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetDataOp::toString() const { + std::string S{" Callback DataOp: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" host_op_id=").append(std::to_string(HostOpId)); + S.append(" optype=").append(std::to_string(OpType)); + S.append(" src=").append(makeHexString((uint64_t)SrcAddr)); + S.append(" src_device_num=").append(std::to_string(SrcDeviceNum)); + S.append(" dest=").append(makeHexString((uint64_t)DstAddr)); + S.append(" dest_device_num=").append(std::to_string(DstDeviceNum)); + S.append(" bytes=").append(std::to_string(Bytes)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetDataOpEmi::toString() const { + std::string S{" Callback DataOp EMI: endpoint="}; + S.append(std::to_string(Endpoint)); + S.append(" optype=").append(std::to_string(OpType)); + S.append(" target_task_data=") + .append(makeHexString((uint64_t)TargetTaskData)); + S.append(" (") + .append( + makeHexString((uint64_t)(TargetTaskData) ? TargetTaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" host_op_id=").append(makeHexString((uint64_t)HostOpId)); + S.append(" (") + .append(makeHexString((uint64_t)(HostOpId) ? (*HostOpId) : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" src=").append(makeHexString((uint64_t)SrcAddr)); + S.append(" src_device_num=").append(std::to_string(SrcDeviceNum)); + S.append(" dest=").append(makeHexString((uint64_t)DstAddr)); + S.append(" dest_device_num=").append(std::to_string(DstDeviceNum)); + S.append(" bytes=").append(std::to_string(Bytes)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetSubmit::toString() const { + std::string S{" Callback Submit: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" host_op_id=").append(std::to_string(HostOpId)); + S.append(" req_num_teams=").append(std::to_string(RequestedNumTeams)); + return S; +} + +std::string internal::TargetSubmitEmi::toString() const { + std::string S{" Callback Submit EMI: endpoint="}; + S.append(std::to_string(Endpoint)); + S.append(" req_num_teams=").append(std::to_string(RequestedNumTeams)); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" host_op_id=").append(makeHexString((uint64_t)HostOpId)); + S.append(" (") + .append(makeHexString((uint64_t)(HostOpId) ? (*HostOpId) : 0, + /*IsPointer=*/false)) + .append(1, ')'); + return S; +} + +std::string internal::DeviceInitialize::toString() const { + std::string S{"Callback Init: device_num="}; + S.append(std::to_string(DeviceNum)); + S.append(" type=").append((Type) ? Type : "(null)"); + S.append(" device=").append(makeHexString((uint64_t)Device)); + S.append(" lookup=").append(makeHexString((uint64_t)LookupFn)); + S.append(" doc=").append(makeHexString((uint64_t)DocStr)); + return S; +} + +std::string internal::DeviceFinalize::toString() const { + std::string S{"Callback Fini: device_num="}; + S.append(std::to_string(DeviceNum)); + return S; +} + +std::string internal::DeviceLoad::toString() const { + std::string S{"Callback Load: device_num:"}; + S.append(std::to_string(DeviceNum)); + S.append(" module_id:").append(std::to_string(ModuleId)); + S.append(" filename:").append((Filename == nullptr) ? "(null)" : Filename); + S.append(" host_adddr:").append(makeHexString((uint64_t)HostAddr)); + S.append(" device_addr:").append(makeHexString((uint64_t)DeviceAddr)); + S.append(" bytes:").append(std::to_string(Bytes)); + return S; +} + +std::string internal::BufferRequest::toString() const { + std::string S{"Allocated "}; + S.append(std::to_string((Bytes != nullptr) ? *Bytes : 0)) + .append(" bytes at "); + S.append(makeHexString((Buffer != nullptr) ? (uint64_t)*Buffer : 0)); + S.append(" in buffer request callback"); + return S; +} + +std::string internal::BufferComplete::toString() const { + std::string S{"Executing buffer complete callback: "}; + S.append(std::to_string(DeviceNum)).append(1, ' '); + S.append(makeHexString((uint64_t)Buffer)).append(1, ' '); + S.append(std::to_string(Bytes)).append(1, ' '); + S.append(makeHexString((uint64_t)Begin)).append(1, ' '); + S.append(std::to_string(BufferOwned)); + return S; +} + +std::string internal::BufferRecord::toString() const { + std::string S{""}; + std::string T{""}; + S.append("rec=").append(makeHexString((uint64_t)RecordPtr)); + S.append(" type=").append(std::to_string(Record.type)); + + T.append("time=").append(std::to_string(Record.time)); + T.append(" thread_id=").append(std::to_string(Record.thread_id)); + T.append(" target_id=").append(std::to_string(Record.target_id)); + + switch (Record.type) { + case ompt_callback_target: + case ompt_callback_target_emi: { + // Handle Target Record + ompt_record_target_t TR = Record.record.target; + S.append(" (Target task) ").append(T); + S.append(" kind=").append(std::to_string(TR.kind)); + S.append(" endpoint=").append(std::to_string(TR.endpoint)); + S.append(" device=").append(std::to_string(TR.device_num)); + S.append(" task_id=").append(std::to_string(TR.task_id)); + S.append(" codeptr=").append(makeHexString((uint64_t)TR.codeptr_ra)); + break; + } + case ompt_callback_target_data_op: + case ompt_callback_target_data_op_emi: { + // Handle Target DataOp Record + ompt_record_target_data_op_t TDR = Record.record.target_data_op; + S.append(" (Target data op) ").append(T); + S.append(" host_op_id=").append(std::to_string(TDR.host_op_id)); + S.append(" optype=").append(std::to_string(TDR.optype)); + S.append(" src_addr=").append(makeHexString((uint64_t)TDR.src_addr)); + S.append(" src_device=").append(std::to_string(TDR.src_device_num)); + S.append(" dest_addr=").append(makeHexString((uint64_t)TDR.dest_addr)); + S.append(" dest_device=").append(std::to_string(TDR.dest_device_num)); + S.append(" bytes=").append(std::to_string(TDR.bytes)); + S.append(" end_time=").append(std::to_string(TDR.end_time)); + S.append(" duration=").append(std::to_string(TDR.end_time - Record.time)); + S.append(" ns codeptr=").append(makeHexString((uint64_t)TDR.codeptr_ra)); + break; + } + case ompt_callback_target_submit: + case ompt_callback_target_submit_emi: { + // Handle Target Kernel Record + ompt_record_target_kernel_t TKR = Record.record.target_kernel; + S.append(" (Target kernel) ").append(T); + S.append(" host_op_id=").append(std::to_string(TKR.host_op_id)); + S.append(" requested_num_teams=") + .append(std::to_string(TKR.requested_num_teams)); + S.append(" granted_num_teams=") + .append(std::to_string(TKR.granted_num_teams)); + S.append(" end_time=").append(std::to_string(TKR.end_time)); + S.append(" duration=").append(std::to_string(TKR.end_time - Record.time)); + S.append(" ns"); + break; + } + default: + S.append(" (unsupported record type)"); + break; + } + + return S; +} + +std::string internal::BufferRecordDeallocation::toString() const { + std::string S{"Deallocated "}; + S.append(makeHexString((uint64_t)Buffer)); + return S; +} diff --git a/openmp/tools/omptest/src/InternalEventOperators.cpp b/openmp/tools/omptest/src/InternalEventOperators.cpp new file mode 100644 index 0000000000000..49c61a44a7aba --- /dev/null +++ b/openmp/tools/omptest/src/InternalEventOperators.cpp @@ -0,0 +1,366 @@ +//===- InternalEventOperators.cpp - Operator implementations ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the internal event operators, like comparators. +/// +//===----------------------------------------------------------------------===// + +#include "InternalEvent.h" + +namespace omptest { + +namespace internal { +// clang-format off +event_class_operator_stub(AssertionSyncPoint) +event_class_operator_stub(AssertionSuspend) +event_class_operator_stub(ThreadBegin) +event_class_operator_stub(ThreadEnd) +event_class_operator_w_body(ParallelBegin, \ + return Expected.NumThreads == Observed.NumThreads; \ +) +event_class_operator_stub(ParallelEnd) +event_class_operator_w_body(Work, \ + bool isSameWorkType = (Expected.WorkType == Observed.WorkType); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameParallelData = \ + (Expected.ParallelData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.ParallelData == Observed.ParallelData); \ + bool isSameTaskData = \ + (Expected.TaskData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.TaskData == Observed.TaskData); \ + bool isSameCount = (Expected.Count == expectedDefault(uint64_t)) ? \ + true : (Expected.Count == Observed.Count); \ + return isSameWorkType && isSameEndpoint && isSameParallelData && \ + isSameTaskData && isSameCount; \ +) +event_class_operator_stub(Dispatch) +event_class_operator_stub(TaskCreate) +event_class_operator_stub(Dependences) +event_class_operator_stub(TaskDependence) +event_class_operator_stub(TaskSchedule) +event_class_operator_w_body(ImplicitTask, \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameActualParallelism = \ + (Expected.ActualParallelism == expectedDefault(unsigned int)) ? \ + true : (Expected.ActualParallelism == Observed.ActualParallelism); \ + bool isSameIndex = (Expected.Index == expectedDefault(unsigned int)) ? \ + true : ( Expected.Index == Observed.Index); \ + return isSameEndpoint && isSameActualParallelism && isSameIndex; \ +) +event_class_operator_stub(Masked) +event_class_operator_w_body(SyncRegion, \ + bool isSameKind = (Expected.Kind == Observed.Kind); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameParallelData = \ + (Expected.ParallelData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.ParallelData == Observed.ParallelData); \ + bool isSameTaskData = \ + (Expected.TaskData == expectedDefault(ompt_data_t *)) ? \ + true : (Expected.TaskData == Observed.TaskData); \ + return isSameKind && isSameEndpoint && isSameParallelData && isSameTaskData; \ +) +event_class_operator_stub(MutexAcquire) +event_class_operator_stub(Mutex) +event_class_operator_stub(NestLock) +event_class_operator_stub(Flush) +event_class_operator_stub(Cancel) +event_class_operator_w_body(Target, \ + bool isSameKind = (Expected.Kind == Observed.Kind); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + return isSameKind && isSameEndpoint && isSameDeviceNum; \ +) +event_class_operator_w_body(TargetEmi, \ + bool isSameKind = (Expected.Kind == Observed.Kind); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + return isSameKind && isSameEndpoint && isSameDeviceNum; \ +) +event_class_operator_w_body(TargetDataOp, \ + bool isSameOpType = (Expected.OpType == Observed.OpType); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + bool isSameSrcAddr = (Expected.SrcAddr == expectedDefault(void *)) ? \ + true : (Expected.SrcAddr == Observed.SrcAddr); \ + bool isSameDstAddr = (Expected.DstAddr == expectedDefault(void *)) ? \ + true : (Expected.DstAddr == Observed.DstAddr); \ + bool isSameSrcDeviceNum = \ + (Expected.SrcDeviceNum == expectedDefault(int)) ? \ + true : (Expected.SrcDeviceNum == Observed.SrcDeviceNum); \ + bool isSameDstDeviceNum = \ + (Expected.DstDeviceNum == expectedDefault(int)) ? \ + true : (Expected.DstDeviceNum == Observed.DstDeviceNum); \ + return isSameOpType && isSameSize && isSameSrcAddr && isSameDstAddr && \ + isSameSrcDeviceNum && isSameDstDeviceNum; \ +) +event_class_operator_w_body(TargetDataOpEmi, \ + bool isSameOpType = (Expected.OpType == Observed.OpType); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + bool isSameSrcAddr = (Expected.SrcAddr == expectedDefault(void *)) ? \ + true : (Expected.SrcAddr == Observed.SrcAddr); \ + bool isSameDstAddr = (Expected.DstAddr == expectedDefault(void *)) ? \ + true : (Expected.DstAddr == Observed.DstAddr); \ + bool isSameSrcDeviceNum = \ + (Expected.SrcDeviceNum == expectedDefault(int)) ? \ + true : (Expected.SrcDeviceNum == Observed.SrcDeviceNum); \ + bool isSameDstDeviceNum = \ + (Expected.DstDeviceNum == expectedDefault(int)) ? \ + true : (Expected.DstDeviceNum == Observed.DstDeviceNum); \ + return isSameOpType && isSameEndpoint && isSameSize && isSameSrcAddr && \ + isSameDstAddr && isSameSrcDeviceNum && isSameDstDeviceNum; \ +) +event_class_operator_w_body(TargetSubmit, \ + bool isSameReqNumTeams = \ + (Expected.RequestedNumTeams == Observed.RequestedNumTeams); \ + return isSameReqNumTeams; \ +) +event_class_operator_w_body(TargetSubmitEmi, \ + bool isSameReqNumTeams = \ + (Expected.RequestedNumTeams == Observed.RequestedNumTeams); \ + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); \ + return isSameReqNumTeams && isSameEndpoint; \ +) +event_class_operator_stub(ControlTool) +event_class_operator_w_body(DeviceInitialize, \ + bool isSameDeviceNum = (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameType = (Expected.Type == expectedDefault(const char *)) ? \ + true : \ + ((Expected.Type == Observed.Type) || \ + (strcmp(Expected.Type, Observed.Type) == 0)); \ + bool isSameDevice = \ + (Expected.Device == expectedDefault(ompt_device_t *)) ? \ + true : (Expected.Device == Observed.Device); \ + return isSameDeviceNum && isSameType && isSameDevice; \ +) +event_class_operator_w_body(DeviceFinalize, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + return isSameDeviceNum; +) +event_class_operator_w_body(DeviceLoad, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + return isSameDeviceNum && isSameSize; \ +) +event_class_operator_stub(DeviceUnload) +event_class_operator_w_body(BufferRequest, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t *)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + return isSameDeviceNum && isSameSize; \ +) +event_class_operator_w_body(BufferComplete, \ + bool isSameDeviceNum = (Expected.DeviceNum == expectedDefault(int)) ? \ + true : (Expected.DeviceNum == Observed.DeviceNum); \ + bool isSameSize = (Expected.Bytes == expectedDefault(size_t)) ? \ + true : (Expected.Bytes == Observed.Bytes); \ + return isSameDeviceNum && isSameSize; \ +) +event_class_operator_w_body(BufferRecord, \ + bool isSameType = (Expected.Record.type == Observed.Record.type); \ + bool isSameTargetId = \ + (Expected.Record.target_id == expectedDefault(ompt_id_t)) \ + ? true \ + : (Expected.Record.target_id == Observed.Record.target_id); \ + if (!(isSameType && isSameTargetId)) return false; \ + bool isEqual = true; \ + ompt_device_time_t ObservedDurationNs = \ + Observed.Record.record.target_data_op.end_time - Observed.Record.time; \ + switch(Expected.Record.type) { \ + case ompt_callback_target: \ + isEqual &= \ + (Expected.Record.record.target.kind == expectedDefault(ompt_target_t)) \ + ? true \ + : (Expected.Record.record.target.kind == \ + Observed.Record.record.target.kind); \ + isEqual &= \ + (Expected.Record.record.target.endpoint == \ + expectedDefault(ompt_scope_endpoint_t)) \ + ? true \ + : (Expected.Record.record.target.endpoint == \ + Observed.Record.record.target.endpoint); \ + isEqual &= \ + (Expected.Record.record.target.device_num == expectedDefault(int)) \ + ? true \ + : (Expected.Record.record.target.device_num == \ + Observed.Record.record.target.device_num); \ + break; \ + case ompt_callback_target_data_op: \ + isEqual &= \ + (Expected.Record.record.target_data_op.optype == \ + expectedDefault(ompt_target_data_op_t)) \ + ? true \ + : (Expected.Record.record.target_data_op.optype == \ + Observed.Record.record.target_data_op.optype); \ + isEqual &= \ + (Expected.Record.record.target_data_op.bytes == expectedDefault(size_t)) \ + ? true \ + : (Expected.Record.record.target_data_op.bytes == \ + Observed.Record.record.target_data_op.bytes); \ + isEqual &= \ + (Expected.Record.record.target_data_op.src_addr == \ + expectedDefault(void *)) \ + ? true \ + : (Expected.Record.record.target_data_op.src_addr == \ + Observed.Record.record.target_data_op.src_addr); \ + isEqual &= \ + (Expected.Record.record.target_data_op.dest_addr == \ + expectedDefault(void *)) \ + ? true \ + : (Expected.Record.record.target_data_op.dest_addr == \ + Observed.Record.record.target_data_op.dest_addr); \ + isEqual &= \ + (Expected.Record.record.target_data_op.src_device_num == \ + expectedDefault(int)) \ + ? true \ + : (Expected.Record.record.target_data_op.src_device_num == \ + Observed.Record.record.target_data_op.src_device_num); \ + isEqual &= \ + (Expected.Record.record.target_data_op.dest_device_num == \ + expectedDefault(int)) \ + ? true \ + : (Expected.Record.record.target_data_op.dest_device_num == \ + Observed.Record.record.target_data_op.dest_device_num); \ + isEqual &= \ + (Expected.Record.record.target_data_op.host_op_id == \ + expectedDefault(ompt_id_t)) \ + ? true \ + : (Expected.Record.record.target_data_op.host_op_id == \ + Observed.Record.record.target_data_op.host_op_id); \ + isEqual &= \ + (Expected.Record.record.target_data_op.codeptr_ra == \ + expectedDefault(void *)) \ + ? true \ + : (Expected.Record.record.target_data_op.codeptr_ra == \ + Observed.Record.record.target_data_op.codeptr_ra); \ + if (Expected.Record.record.target_data_op.end_time != \ + expectedDefault(ompt_device_time_t)) { \ + isEqual &= \ + ObservedDurationNs <= Expected.Record.record.target_data_op.end_time; \ + } \ + isEqual &= ObservedDurationNs >= Expected.Record.time; \ + break; \ + case ompt_callback_target_submit: \ + ObservedDurationNs = \ + Observed.Record.record.target_kernel.end_time - Observed.Record.time; \ + isEqual &= \ + (Expected.Record.record.target_kernel.requested_num_teams == \ + expectedDefault(unsigned int)) \ + ? true \ + : (Expected.Record.record.target_kernel.requested_num_teams == \ + Observed.Record.record.target_kernel.requested_num_teams); \ + isEqual &= \ + (Expected.Record.record.target_kernel.granted_num_teams == \ + expectedDefault(unsigned int)) \ + ? true \ + : (Expected.Record.record.target_kernel.granted_num_teams == \ + Observed.Record.record.target_kernel.granted_num_teams); \ + isEqual &= \ + (Expected.Record.record.target_kernel.host_op_id == \ + expectedDefault(ompt_id_t)) \ + ? true \ + : (Expected.Record.record.target_kernel.host_op_id == \ + Observed.Record.record.target_kernel.host_op_id); \ + if (Expected.Record.record.target_kernel.end_time != \ + expectedDefault(ompt_device_time_t)) { \ + isEqual &= \ + ObservedDurationNs <= Expected.Record.record.target_kernel.end_time; \ + } \ + isEqual &= ObservedDurationNs >= Expected.Record.time; \ + break; \ + default: \ + assert(false && "Encountered invalid record type"); \ + } \ + return isEqual; \ +) +event_class_operator_stub(BufferRecordDeallocation) + +define_cast_func(AssertionSyncPoint) +define_cast_func(AssertionSuspend) +define_cast_func(ThreadBegin) +define_cast_func(ThreadEnd) +define_cast_func(ParallelBegin) +define_cast_func(ParallelEnd) +define_cast_func(Work) +define_cast_func(Dispatch) +define_cast_func(TaskCreate) +define_cast_func(Dependences) +define_cast_func(TaskDependence) +define_cast_func(TaskSchedule) +define_cast_func(ImplicitTask) +define_cast_func(Masked) +define_cast_func(SyncRegion) +define_cast_func(MutexAcquire) +define_cast_func(Mutex) +define_cast_func(NestLock) +define_cast_func(Flush) +define_cast_func(Cancel) +define_cast_func(Target) +define_cast_func(TargetEmi) +define_cast_func(TargetDataOp) +define_cast_func(TargetDataOpEmi) +define_cast_func(TargetSubmit) +define_cast_func(TargetSubmitEmi) +define_cast_func(ControlTool) +define_cast_func(DeviceInitialize) +define_cast_func(DeviceFinalize) +define_cast_func(DeviceLoad) +define_cast_func(DeviceUnload) +define_cast_func(BufferRequest) +define_cast_func(BufferComplete) +define_cast_func(BufferRecord) +define_cast_func(BufferRecordDeallocation) + +class_equals_op(AssertionSyncPoint) +class_equals_op(AssertionSuspend) +class_equals_op(ThreadBegin) +class_equals_op(ThreadEnd) +class_equals_op(ParallelBegin) +class_equals_op(ParallelEnd) +class_equals_op(Work) +class_equals_op(Dispatch) +class_equals_op(TaskCreate) +class_equals_op(Dependences) +class_equals_op(TaskDependence) +class_equals_op(TaskSchedule) +class_equals_op(ImplicitTask) +class_equals_op(Masked) +class_equals_op(SyncRegion) +class_equals_op(MutexAcquire) +class_equals_op(Mutex) +class_equals_op(NestLock) +class_equals_op(Flush) +class_equals_op(Cancel) +class_equals_op(Target) +class_equals_op(TargetEmi) +class_equals_op(TargetDataOp) +class_equals_op(TargetDataOpEmi) +class_equals_op(TargetSubmit) +class_equals_op(TargetSubmitEmi) +class_equals_op(ControlTool) +class_equals_op(DeviceInitialize) +class_equals_op(DeviceFinalize) +class_equals_op(DeviceLoad) +class_equals_op(DeviceUnload) +class_equals_op(BufferRequest) +class_equals_op(BufferComplete) +class_equals_op(BufferRecord) +class_equals_op(BufferRecordDeallocation) +// clang-format on + +} // namespace internal + +} // namespace omptest diff --git a/openmp/tools/omptest/src/Logging.cpp b/openmp/tools/omptest/src/Logging.cpp new file mode 100644 index 0000000000000..28329c74d188d --- /dev/null +++ b/openmp/tools/omptest/src/Logging.cpp @@ -0,0 +1,177 @@ +//===- Logging.cpp - General logging class implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements ompTest-tailored logging. +/// +//===----------------------------------------------------------------------===// + +#include "Logging.h" + +using namespace omptest; +using namespace logging; + +Logger::Logger(Level LogLevel, std::ostream &OutStream, bool FormatOutput) + : LoggingLevel(LogLevel), OutStream(OutStream), FormatOutput(FormatOutput) { + // Flush any buffered output + OutStream << std::flush; +} + +Logger::~Logger() { + // Flush any buffered output + OutStream << std::flush; +} + +std::map> AggregatedFormatOptions{ + {Level::DIAGNOSTIC, {FormatOption::COLOR_LightBlue}}, + {Level::INFO, {FormatOption::COLOR_LightGray}}, + {Level::WARNING, {FormatOption::COLOR_LightYellow}}, + {Level::ERROR, {FormatOption::COLOR_Red}}, + {Level::CRITICAL, {FormatOption::COLOR_LightRed}}, + {Level::Default, {FormatOption::NONE}}, + {Level::ExpectedEvent, {FormatOption::BOLD, FormatOption::COLOR_Cyan}}, + {Level::ObservedEvent, {FormatOption::COLOR_Cyan}}, + {Level::OffendingEvent, {FormatOption::COLOR_Yellow}}}; + +const char *logging::to_string(Level LogLevel) { + switch (LogLevel) { + case Level::DIAGNOSTIC: + return "DIAGNOSTIC"; + case Level::INFO: + return "INFO"; + case Level::WARNING: + return "WARNING"; + case Level::ERROR: + return "ERROR"; + case Level::CRITICAL: + return "CRITICAL"; + default: + assert(false && "Requested string representation for unknown LogLevel"); + return "UNKNOWN"; + } +} + +std::string logging::getFormatSequence(Level LogLevel) { + auto Options = AggregatedFormatOptions[LogLevel]; + std::stringstream SS{"\033["}; + SS << "\033["; + if (!Options.empty()) { + for (auto &Option : AggregatedFormatOptions[LogLevel]) + SS << int(Option) << ';'; + SS.seekp(-1, SS.cur); + SS << 'm'; + } else { + // Fallback to NONE / reset formatting + SS << "0m"; + } + return SS.str(); +} + +std::string logging::format(const std::string &Message, FormatOption Option) { + std::stringstream SS{"\033["}; + SS << "\033["; + SS << int(Option) << 'm' << Message << "\033[0m"; + return SS.str(); +} + +std::string logging::format(const std::string &Message, + std::set Options) { + std::stringstream SS{"\033["}; + SS << "\033["; + for (auto &Option : Options) + SS << int(Option) << ';'; + SS.seekp(-1, SS.cur); + SS << 'm' << Message << "\033[0m"; + return SS.str(); +} + +void Logger::log(Level LogLevel, const std::string &Message) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << Message << getFormatSequence() << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message << std::endl; + } +} + +void Logger::eventMismatch(const omptest::OmptAssertEvent &OffendingEvent, + const std::string &Message, Level LogLevel) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << getFormatSequence() + << format(Message, AggregatedFormatOptions[LogLevel]) + << "\n\tOffending event name='" + << format(OffendingEvent.getEventName(), + AggregatedFormatOptions[Level::OffendingEvent]) + << "'\n\tOffending='" + << format(OffendingEvent.toString(), + AggregatedFormatOptions[Level::OffendingEvent]) + << '\'' << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message + << "\n\tOffending event name='" << OffendingEvent.getEventName() + << "'\n\tOffending='" << OffendingEvent.toString() << '\'' + << std::endl; + } +} + +void Logger::eventMismatch(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent, + const std::string &Message, Level LogLevel) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << Message << getFormatSequence() + << "\n\tExpected event name='" + << format(ExpectedEvent.getEventName(), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "' observe='" + << format(to_string(ExpectedEvent.getEventExpectedState()), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "'\n\tObserved event name='" + << format(ObservedEvent.getEventName(), + AggregatedFormatOptions[Level::ObservedEvent]) + << "'\n\tExpected='" + << format(ExpectedEvent.toString(), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "'\n\tObserved='" + << format(ObservedEvent.toString(), + AggregatedFormatOptions[Level::ObservedEvent]) + << '\'' << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message + << "\n\tExpected event name='" << ExpectedEvent.getEventName() + << "' observe='" + << to_string(ExpectedEvent.getEventExpectedState()) + << "'\n\tObserved event name='" << ObservedEvent.getEventName() + << "'\n\tExpected='" << ExpectedEvent.toString() + << "'\n\tObserved='" << ObservedEvent.toString() << '\'' + << std::endl; + } +} + +void Logger::setFormatOutput(bool Enabled) { FormatOutput = Enabled; } + +Level Logger::getLoggingLevel() const { return LoggingLevel; } + +void Logger::setLoggingLevel(Level LogLevel) { LoggingLevel = LogLevel; } diff --git a/openmp/tools/omptest/src/OmptAssertEvent.cpp b/openmp/tools/omptest/src/OmptAssertEvent.cpp new file mode 100644 index 0000000000000..b03f267a8c397 --- /dev/null +++ b/openmp/tools/omptest/src/OmptAssertEvent.cpp @@ -0,0 +1,587 @@ +//===- OmptAssertEvent.cpp - Assertion event implementations ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements assertion event CTORs, for generally all observable events. +/// +//===----------------------------------------------------------------------===// + +#include "OmptAssertEvent.h" +#include + +using namespace omptest; + +const char *omptest::to_string(ObserveState State) { + switch (State) { + case ObserveState::generated: + return "generated"; + case ObserveState::always: + return "always"; + case ObserveState::never: + return "never"; + default: + assert(false && "Requested string representation for unknown ObserveState"); + return "UNKNOWN"; + } +} + +OmptAssertEvent::OmptAssertEvent(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + internal::InternalEvent *IE) + : Name(Name), Group(Group), ExpectedState(Expected), TheEvent(IE) {} + +OmptAssertEvent OmptAssertEvent::AssertionSyncPoint( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, const std::string &SyncPointName) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::AssertionSyncPoint(SyncPointName)); +} + +OmptAssertEvent +OmptAssertEvent::AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::AssertionSuspend()); +} + +OmptAssertEvent OmptAssertEvent::ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ThreadBegin(ThreadType)); +} + +OmptAssertEvent OmptAssertEvent::ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::ThreadEnd()); +} + +OmptAssertEvent OmptAssertEvent::ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ParallelBegin(NumThreads)); +} + +OmptAssertEvent OmptAssertEvent::ParallelEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ParallelEnd(ParallelData, + EncounteringTaskData, Flags, + CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::Work(WorkType, Endpoint, ParallelData, + TaskData, Count, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::Dispatch(ParallelData, TaskData, Kind, Instance)); +} + +OmptAssertEvent OmptAssertEvent::TaskCreate( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *NewTaskData, + int Flags, int HasDependences, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TaskCreate(EncounteringTaskData, EncounteringTaskFrame, + NewTaskData, Flags, HasDependences, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::TaskSchedule()); +} + +OmptAssertEvent OmptAssertEvent::ImplicitTask( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + unsigned int ActualParallelism, unsigned int Index, int Flags) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ImplicitTask(Endpoint, ParallelData, + TaskData, ActualParallelism, + Index, Flags)); +} + +OmptAssertEvent OmptAssertEvent::SyncRegion( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::SyncRegion(Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_id_t TargetId, + const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::Target(Kind, Endpoint, DeviceNum, + TaskData, TargetId, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetEmi(Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, + TargetData, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOp( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetDataOp( + TargetId, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOp( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, size_t Bytes, + void *SrcAddr, void *DstAddr, int SrcDeviceNum, int DstDeviceNum, + ompt_id_t TargetId, ompt_id_t HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetDataOp( + TargetId, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOpEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetDataOpEmi(Endpoint, TargetTaskData, TargetData, + HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOpEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + ompt_scope_endpoint_t Endpoint, size_t Bytes, void *SrcAddr, void *DstAddr, + int SrcDeviceNum, int DstDeviceNum, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetDataOpEmi(Endpoint, TargetTaskData, TargetData, + HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_id_t TargetId, + ompt_id_t HostOpId, + unsigned int RequestedNumTeams) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetSubmit(TargetId, HostOpId, RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + unsigned int RequestedNumTeams, + ompt_id_t TargetId, + ompt_id_t HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetSubmit(TargetId, HostOpId, RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmitEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetSubmitEmi(Endpoint, TargetData, + HostOpId, + RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmitEmi(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + unsigned int RequestedNumTeams, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, + ompt_id_t *HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetSubmitEmi(Endpoint, TargetData, + HostOpId, + RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::ControlTool(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::ControlTool()); +} + +OmptAssertEvent OmptAssertEvent::DeviceInitialize( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, const char *Type, + ompt_device_t *Device, ompt_function_lookup_t LookupFn, + const char *DocumentationStr) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::DeviceInitialize(DeviceNum, Type, Device, + LookupFn, + DocumentationStr)); +} + +OmptAssertEvent OmptAssertEvent::DeviceFinalize(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::DeviceFinalize(DeviceNum)); +} + +OmptAssertEvent +OmptAssertEvent::DeviceLoad(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Filename, int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, void *HostAddr, + void *DeviceAddr, uint64_t ModuleId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::DeviceLoad(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId)); +} + +OmptAssertEvent OmptAssertEvent::DeviceUnload(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::DeviceUnload()); +} + +OmptAssertEvent OmptAssertEvent::BufferRequest(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum, + ompt_buffer_t **Buffer, + size_t *Bytes) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRequest(DeviceNum, Buffer, Bytes)); +} + +OmptAssertEvent OmptAssertEvent::BufferComplete( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, ompt_buffer_t *Buffer, + size_t Bytes, ompt_buffer_cursor_t Begin, int BufferOwned) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferComplete(DeviceNum, Buffer, Bytes, + Begin, BufferOwned)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_record_ompt_t *Record) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(Record)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, ompt_id_t TaskId, + ompt_id_t TargetId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + if (Type != ompt_callback_target) + assert(false && "CTOR only suited for type: 'ompt_callback_target'"); + + ompt_record_target_t Subrecord{Kind, Endpoint, DeviceNum, + TaskId, TargetId, CodeptrRA}; + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + RecordPtr->time = expectedDefault(ompt_device_time_t); + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + std::pair Timeframe, void *SrcAddr, + void *DstAddr, int SrcDeviceNum, int DstDeviceNum, ompt_id_t TargetId, + ompt_id_t HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + if (Type != ompt_callback_target_data_op) + assert(false && + "CTOR only suited for type: 'ompt_callback_target_data_op'"); + + ompt_record_target_data_op_t Subrecord{ + HostOpId, OpType, SrcAddr, SrcDeviceNum, DstAddr, + DstDeviceNum, Bytes, Timeframe.second, CodeptrRA}; + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + RecordPtr->time = Timeframe.first; + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target_data_op = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + ompt_device_time_t MinimumTimeDelta, void *SrcAddr, void *DstAddr, + int SrcDeviceNum, int DstDeviceNum, ompt_id_t TargetId, ompt_id_t HostOpId, + const void *CodeptrRA) { + return BufferRecord(Name, Group, Expected, Type, OpType, Bytes, + {MinimumTimeDelta, expectedDefault(ompt_device_time_t)}, + SrcAddr, DstAddr, SrcDeviceNum, DstDeviceNum, TargetId, + HostOpId, CodeptrRA); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + std::pair Timeframe, + unsigned int RequestedNumTeams, unsigned int GrantedNumTeams, + ompt_id_t TargetId, ompt_id_t HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + bool isDefault = (Timeframe.first == expectedDefault(ompt_device_time_t)); + isDefault &= (Timeframe.second == expectedDefault(ompt_device_time_t)); + isDefault &= (RequestedNumTeams == expectedDefault(unsigned int)); + isDefault &= (GrantedNumTeams == expectedDefault(unsigned int)); + isDefault &= (TargetId == expectedDefault(ompt_id_t)); + isDefault &= (HostOpId == expectedDefault(ompt_id_t)); + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + + // This handles the simplest occurrence of a device tracing record + // We can only check for Type -- since all other properties are set to default + if (isDefault) { + RecordPtr->time = expectedDefault(ompt_device_time_t); + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = expectedDefault(ompt_id_t); + if (Type == ompt_callback_target) { + ompt_record_target_t Subrecord{expectedDefault(ompt_target_t), + expectedDefault(ompt_scope_endpoint_t), + expectedDefault(int), + expectedDefault(ompt_id_t), + expectedDefault(ompt_id_t), + expectedDefault(void *)}; + RecordPtr->record.target = Subrecord; + } + + if (Type == ompt_callback_target_data_op) { + ompt_record_target_data_op_t Subrecord{ + expectedDefault(ompt_id_t), expectedDefault(ompt_target_data_op_t), + expectedDefault(void *), expectedDefault(int), + expectedDefault(void *), expectedDefault(int), + expectedDefault(size_t), expectedDefault(ompt_device_time_t), + expectedDefault(void *)}; + RecordPtr->record.target_data_op = Subrecord; + } + + if (Type == ompt_callback_target_submit) { + ompt_record_target_kernel_t Subrecord{ + expectedDefault(ompt_id_t), expectedDefault(unsigned int), + expectedDefault(unsigned int), expectedDefault(ompt_device_time_t)}; + RecordPtr->record.target_kernel = Subrecord; + } + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); + } + + if (Type != ompt_callback_target_submit) + assert(false && "CTOR only suited for type: 'ompt_callback_target_submit'"); + + ompt_record_target_kernel_t Subrecord{HostOpId, RequestedNumTeams, + GrantedNumTeams, Timeframe.second}; + + RecordPtr->time = Timeframe.first; + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target_kernel = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_device_time_t MinimumTimeDelta, unsigned int RequestedNumTeams, + unsigned int GrantedNumTeams, ompt_id_t TargetId, ompt_id_t HostOpId) { + return BufferRecord(Name, Group, Expected, Type, + {MinimumTimeDelta, expectedDefault(ompt_device_time_t)}, + RequestedNumTeams, GrantedNumTeams, TargetId, HostOpId); +} + +OmptAssertEvent OmptAssertEvent::BufferRecordDeallocation( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_buffer_t *Buffer) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecordDeallocation(Buffer)); +} + +std::string OmptAssertEvent::getEventName() const { return Name; } + +std::string OmptAssertEvent::getEventGroup() const { return Group; } + +ObserveState OmptAssertEvent::getEventExpectedState() const { + return ExpectedState; +} + +internal::EventTy OmptAssertEvent::getEventType() const { + return TheEvent->getType(); +} + +internal::InternalEvent *OmptAssertEvent::getEvent() const { + return TheEvent.get(); +} + +std::string OmptAssertEvent::toString(bool PrefixEventName) const { + std::string S; + if (PrefixEventName) + S.append(getEventName()).append(": "); + S.append((TheEvent == nullptr) ? "OmptAssertEvent" : TheEvent->toString()); + return S; +} + +bool omptest::operator==(const OmptAssertEvent &A, const OmptAssertEvent &B) { + assert(A.TheEvent.get() != nullptr && "A is valid"); + assert(B.TheEvent.get() != nullptr && "B is valid"); + + return A.TheEvent->getType() == B.TheEvent->getType() && + A.TheEvent->equals(B.TheEvent.get()); +} diff --git a/openmp/tools/omptest/src/OmptAsserter.cpp b/openmp/tools/omptest/src/OmptAsserter.cpp new file mode 100644 index 0000000000000..1c2f2dee69e16 --- /dev/null +++ b/openmp/tools/omptest/src/OmptAsserter.cpp @@ -0,0 +1,480 @@ +//===- OmptAsserter.cpp - Asserter-related implementations ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements all asserter-related class methods, like: notifications, handling +/// of groups or determination of the testcase state. +/// +//===----------------------------------------------------------------------===// + +#include "OmptAsserter.h" +#include "Logging.h" + +#include + +using namespace omptest; +using namespace internal; + +// Initialize static members +std::mutex OmptAsserter::StaticMemberAccessMutex; +std::weak_ptr + OmptAsserter::EventGroupInterfaceInstance; +std::weak_ptr OmptAsserter::LoggingInstance; + +OmptAsserter::OmptAsserter() { + // Protect static members access + std::lock_guard Lock(StaticMemberAccessMutex); + + // Upgrade OmptEventGroupInterface weak_ptr to shared_ptr + { + EventGroups = EventGroupInterfaceInstance.lock(); + if (!EventGroups) { + // Coordinator doesn't exist or was previously destroyed, create a new + // one. + EventGroups = std::make_shared(); + // Store a weak reference to it + EventGroupInterfaceInstance = EventGroups; + } + // EventGroups is now a valid shared_ptr, either to a new or existing + // instance. + } + + // Upgrade logging::Logger weak_ptr to shared_ptr + { + Log = LoggingInstance.lock(); + if (!Log) { + // Coordinator doesn't exist or was previously destroyed, create a new + // one. + Log = std::make_shared(); + // Store a weak reference to it + LoggingInstance = Log; + } + // Log is now a valid shared_ptr, either to a new or existing instance. + } +} + +void OmptListener::setActive(bool Enabled) { Active = Enabled; } + +bool OmptListener::isActive() { return Active; } + +bool OmptListener::isSuppressedEventType(EventTy EvTy) { + return SuppressedEvents.find(EvTy) != SuppressedEvents.end(); +} + +void OmptListener::permitEvent(EventTy EvTy) { SuppressedEvents.erase(EvTy); } + +void OmptListener::suppressEvent(EventTy EvTy) { + SuppressedEvents.insert(EvTy); +} + +void OmptAsserter::insert(OmptAssertEvent &&AE) { + assert(false && "Base class 'insert' has undefined semantics."); +} + +void OmptAsserter::notify(OmptAssertEvent &&AE) { + // Ignore notifications while inactive + if (!isActive() || isSuppressedEventType(AE.getEventType())) + return; + + this->notifyImpl(std::move(AE)); +} + +AssertState OmptAsserter::checkState() { return State; } + +bool OmptAsserter::verifyEventGroups(const OmptAssertEvent &ExpectedEvent, + const OmptAssertEvent &ObservedEvent) { + assert(ExpectedEvent.getEventType() == ObservedEvent.getEventType() && + "Type mismatch: Expected != Observed event type"); + assert(EventGroups && "Missing EventGroups interface"); + + // Ignore all events within "default" group + auto GroupName = ExpectedEvent.getEventGroup(); + + if (GroupName == "default") + return true; + + // Get a pointer to the observed internal event + auto Event = ObservedEvent.getEvent(); + + switch (Event->getType()) { + case EventTy::Target: + if (auto E = static_cast(Event)) { + if (E->Endpoint == ompt_scope_begin) { + // Add new group since we entered a Target Region + EventGroups->addActiveEventGroup(GroupName, + AssertEventGroup{E->TargetId}); + } else if (E->Endpoint == ompt_scope_end) { + // Deprecate group since we return from a Target Region + EventGroups->deprecateActiveEventGroup(GroupName); + } + return true; + } + return false; + case EventTy::TargetEmi: + if (auto E = static_cast(Event)) { + if (E->Endpoint == ompt_scope_begin) { + // Add new group since we entered a Target Region + EventGroups->addActiveEventGroup( + GroupName, AssertEventGroup{E->TargetData->value}); + } else if (E->Endpoint == ompt_scope_end) { + // Deprecate group since we return from a Target Region + EventGroups->deprecateActiveEventGroup(GroupName); + } + return true; + } + return false; + case EventTy::TargetDataOp: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups(GroupName, + AssertEventGroup{E->TargetId}); + + return false; + case EventTy::TargetDataOpEmi: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->TargetData->value}); + + return false; + case EventTy::TargetSubmit: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups(GroupName, + AssertEventGroup{E->TargetId}); + + return false; + case EventTy::TargetSubmitEmi: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->TargetData->value}); + + return false; + case EventTy::BufferRecord: + // BufferRecords are delivered asynchronously: also check deprecated groups. + if (auto E = static_cast(Event)) + return (EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->Record.target_id}) || + EventGroups->checkDeprecatedEventGroups( + GroupName, AssertEventGroup{E->Record.target_id})); + return false; + // Some event types do not need any handling + case EventTy::ThreadBegin: + case EventTy::ThreadEnd: + case EventTy::ParallelBegin: + case EventTy::ParallelEnd: + case EventTy::Work: + case EventTy::Dispatch: + case EventTy::TaskCreate: + case EventTy::Dependences: + case EventTy::TaskDependence: + case EventTy::TaskSchedule: + case EventTy::ImplicitTask: + case EventTy::Masked: + case EventTy::SyncRegion: + case EventTy::MutexAcquire: + case EventTy::Mutex: + case EventTy::NestLock: + case EventTy::Flush: + case EventTy::Cancel: + case EventTy::DeviceInitialize: + case EventTy::DeviceFinalize: + case EventTy::DeviceLoad: + case EventTy::DeviceUnload: + case EventTy::BufferRequest: + case EventTy::BufferComplete: + case EventTy::BufferRecordDeallocation: + return true; + // Some event types must not be encountered + case EventTy::None: + case EventTy::AssertionSyncPoint: + case EventTy::AssertionSuspend: + default: + assert(false && "Encountered invalid event type"); + } + + return true; +} + +void OmptAsserter::setOperationMode(AssertMode Mode) { OperationMode = Mode; } + +void OmptSequencedAsserter::insert(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + Events.emplace_back(std::move(AE)); +} + +void OmptSequencedAsserter::notifyImpl(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + // Ignore notifications while inactive, or for suppressed events + if (Events.empty() || !isActive() || isSuppressedEventType(AE.getEventType())) + return; + + ++NumNotifications; + + // Note: Order of these checks has semantic meaning. + // (1) Synchronization points should fail if there are remaining events, + // otherwise pass. (2) Regular notification while no further events are + // expected: fail. (3) Assertion suspension relies on a next expected event + // being available. (4) All other cases are considered 'regular' and match the + // next expected against the observed event. (5+6) Depending on the state / + // mode we signal failure if no other check has done already, or signaled pass + // by early-exit. + if (consumeSyncPoint(AE) || // Handle observed SyncPoint event + checkExcessNotify(AE) || // Check for remaining expected + consumeSuspend() || // Handle requested suspend + consumeRegularEvent(AE) || // Handle regular event + AssertionSuspended || // Ignore fail, if suspended + OperationMode == AssertMode::relaxed) // Ignore fail, if relaxed op-mode + return; + + Log->eventMismatch(Events[NextEvent], AE, + "[OmptSequencedAsserter] The events are not equal"); + State = AssertState::fail; +} + +bool OmptSequencedAsserter::consumeSyncPoint( + const omptest::OmptAssertEvent &AE) { + if (AE.getEventType() == EventTy::AssertionSyncPoint) { + auto NumRemainingEvents = getRemainingEventCount(); + // Upon encountering a SyncPoint, all events should have been processed + if (NumRemainingEvents == 0) + return true; + + Log->eventMismatch( + AE, + "[OmptSequencedAsserter] Encountered SyncPoint while still awaiting " + + std::to_string(NumRemainingEvents) + " events. Asserted " + + std::to_string(NumSuccessfulAsserts) + "/" + + std::to_string(Events.size()) + " events successfully."); + State = AssertState::fail; + return true; + } + + // Nothing to process: continue. + return false; +} + +bool OmptSequencedAsserter::checkExcessNotify( + const omptest::OmptAssertEvent &AE) { + if (NextEvent >= Events.size()) { + // If we are not expecting any more events and passively asserting: return + if (AssertionSuspended) + return true; + + Log->eventMismatch( + AE, "[OmptSequencedAsserter] Too many events to check (" + + std::to_string(NumNotifications) + "). Asserted " + + std::to_string(NumSuccessfulAsserts) + "/" + + std::to_string(Events.size()) + " events successfully."); + State = AssertState::fail; + return true; + } + + // Remaining expected events present: continue. + return false; +} + +bool OmptSequencedAsserter::consumeSuspend() { + // On AssertionSuspend -- enter 'passive' assertion. + // Since we may encounter multiple, successive AssertionSuspend events, loop + // until we hit the next non-AssertionSuspend event. + while (Events[NextEvent].getEventType() == EventTy::AssertionSuspend) { + AssertionSuspended = true; + // We just hit the very last event: indicate early exit. + if (++NextEvent >= Events.size()) + return true; + } + + // Continue with remaining notification logic. + return false; +} + +bool OmptSequencedAsserter::consumeRegularEvent( + const omptest::OmptAssertEvent &AE) { + // If we are actively asserting, increment the event counter. + // Otherwise: If passively asserting, we will keep waiting for a match. + auto &E = Events[NextEvent]; + if (E == AE && verifyEventGroups(E, AE)) { + if (E.getEventExpectedState() == ObserveState::always) { + ++NumSuccessfulAsserts; + } else if (E.getEventExpectedState() == ObserveState::never) { + Log->eventMismatch(E, AE, + "[OmptSequencedAsserter] Encountered forbidden event"); + State = AssertState::fail; + } + + // Return to active assertion + if (AssertionSuspended) + AssertionSuspended = false; + + // Match found, increment index and indicate early exit (success). + ++NextEvent; + return true; + } + + // Continue with remaining notification logic. + return false; +} + +size_t OmptSequencedAsserter::getRemainingEventCount() { + return std::count_if(Events.begin(), Events.end(), + [](const omptest::OmptAssertEvent &E) { + return E.getEventExpectedState() == + ObserveState::always; + }) - + NumSuccessfulAsserts; +} + +AssertState OmptSequencedAsserter::checkState() { + // This is called after the testcase executed. + // Once reached the number of successful notifications should be equal to the + // number of expected events. However, there may still be excluded as well as + // special asserter events remaining in the sequence. + for (size_t i = NextEvent; i < Events.size(); ++i) { + auto &E = Events[i]; + if (E.getEventExpectedState() == ObserveState::always) { + State = AssertState::fail; + Log->eventMismatch(E, "[OmptSequencedAsserter] Expected event was not " + "encountered (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + break; + } + } + + return State; +} + +void OmptEventAsserter::insert(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + Events.emplace_back(std::move(AE)); +} + +void OmptEventAsserter::notifyImpl(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + if (Events.empty() || !isActive() || isSuppressedEventType(AE.getEventType())) + return; + + if (NumEvents == 0) + NumEvents = Events.size(); + + ++NumNotifications; + + if (AE.getEventType() == EventTy::AssertionSyncPoint) { + auto NumRemainingEvents = getRemainingEventCount(); + // Upon encountering a SyncPoint, all events should have been processed + if (NumRemainingEvents == 0) + return; + + Log->eventMismatch( + AE, "[OmptEventAsserter] Encountered SyncPoint while still awaiting " + + std::to_string(NumRemainingEvents) + " events. Asserted " + + std::to_string(NumSuccessfulAsserts) + " events successfully."); + State = AssertState::fail; + return; + } + + for (size_t i = 0; i < Events.size(); ++i) { + auto &E = Events[i]; + if (E == AE && verifyEventGroups(E, AE)) { + if (E.getEventExpectedState() == ObserveState::always) { + Events.erase(Events.begin() + i); + ++NumSuccessfulAsserts; + } else if (E.getEventExpectedState() == ObserveState::never) { + Log->eventMismatch(E, AE, + "[OmptEventAsserter] Encountered forbidden event"); + State = AssertState::fail; + } + return; + } + } + + if (OperationMode == AssertMode::strict) { + Log->eventMismatch(AE, "[OmptEventAsserter] Too many events to check (" + + std::to_string(NumNotifications) + + "). Asserted " + + std::to_string(NumSuccessfulAsserts) + + " events successfully. (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + State = AssertState::fail; + return; + } +} + +size_t OmptEventAsserter::getRemainingEventCount() { + // size_t EventCount = std::count_if(Events.begin(), Events.end(), [](const + // omptest::OmptAssertEvent &E) { return E.getEventExpectedState() == + // ObserveState::always; }); + return std::count_if( + Events.begin(), Events.end(), [](const omptest::OmptAssertEvent &E) { + return E.getEventExpectedState() == ObserveState::always; + }); +} + +AssertState OmptEventAsserter::checkState() { + // This is called after the testcase executed. + // Once reached no more expected events should be in the queue + for (const auto &E : Events) { + // Check if any of the remaining events were expected to be observed + if (E.getEventExpectedState() == ObserveState::always) { + State = AssertState::fail; + Log->eventMismatch(E, "[OmptEventAsserter] Expected event was not " + "encountered (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + break; + } + } + + return State; +} + +void OmptEventReporter::notify(OmptAssertEvent &&AE) { + if (!isActive() || isSuppressedEventType(AE.getEventType())) + return; + + // Prepare notification, containing the newline to avoid stream interleaving. + auto Notification{AE.toString()}; + Notification.push_back('\n'); + OutStream << Notification; +} + +bool OmptEventGroupInterface::addActiveEventGroup( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + if (EventGroup != ActiveEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion) + return false; + ActiveEventGroups.emplace(GroupName, Group); + return true; +} + +bool OmptEventGroupInterface::deprecateActiveEventGroup( + const std::string &GroupName) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + auto DeprecatedEventGroup = DeprecatedEventGroups.find(GroupName); + if (EventGroup == ActiveEventGroups.end() && + DeprecatedEventGroup != DeprecatedEventGroups.end()) + return false; + DeprecatedEventGroups.emplace(GroupName, EventGroup->second); + ActiveEventGroups.erase(GroupName); + return true; +} + +bool OmptEventGroupInterface::checkActiveEventGroups( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + return (EventGroup != ActiveEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion); +} + +bool OmptEventGroupInterface::checkDeprecatedEventGroups( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = DeprecatedEventGroups.find(GroupName); + return (EventGroup != DeprecatedEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion); +} diff --git a/openmp/tools/omptest/src/OmptCallbackHandler.cpp b/openmp/tools/omptest/src/OmptCallbackHandler.cpp new file mode 100644 index 0000000000000..0794a1c27a902 --- /dev/null +++ b/openmp/tools/omptest/src/OmptCallbackHandler.cpp @@ -0,0 +1,445 @@ +//===- OmptCallbackHandler.cpp - OMPT Callback handling impl. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the OMPT callback handling implementations. +/// +//===----------------------------------------------------------------------===// + +#include "OmptCallbackHandler.h" + +using namespace omptest; + +OmptCallbackHandler *Handler = nullptr; + +OmptCallbackHandler &OmptCallbackHandler::get() { + if (Handler == nullptr) + Handler = new OmptCallbackHandler(); + + return *Handler; +} + +void OmptCallbackHandler::subscribe(OmptListener *Listener) { + Subscribers.push_back(Listener); +} + +void OmptCallbackHandler::clearSubscribers() { + replay(); + + Subscribers.clear(); +} + +void OmptCallbackHandler::replay() { + if (!RecordAndReplay) + return; + + for (auto &E : RecordedEvents) + for (const auto &S : Subscribers) + S->notify(std::move(E)); +} + +void OmptCallbackHandler::handleThreadBegin(ompt_thread_t ThreadType, + ompt_data_t *ThreadData) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ThreadBegin( + "Thread Begin", "", ObserveState::generated, ThreadType)); + return; + } + + // Initial thread event likely to preceed assertion registration, so skip + if (ThreadType == ompt_thread_initial) + return; + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ThreadBegin( + "Thread Begin", "", ObserveState::generated, ThreadType)); +} + +void OmptCallbackHandler::handleThreadEnd(ompt_data_t *ThreadData) { + if (RecordAndReplay) { + recordEvent( + OmptAssertEvent::ThreadEnd("Thread End", "", ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify( + OmptAssertEvent::ThreadEnd("Thread End", "", ObserveState::generated)); +} + +void OmptCallbackHandler::handleTaskCreate( + ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *NewTaskData, + int Flags, int HasDependences, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TaskCreate( + "Task Create", "", ObserveState::generated, EncounteringTaskData, + EncounteringTaskFrame, NewTaskData, Flags, HasDependences, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TaskCreate( + "Task Create", "", ObserveState::generated, EncounteringTaskData, + EncounteringTaskFrame, NewTaskData, Flags, HasDependences, CodeptrRA)); +} + +void OmptCallbackHandler::handleTaskSchedule(ompt_data_t *PriorTaskData, + ompt_task_status_t PriorTaskStatus, + ompt_data_t *NextTaskData) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TaskSchedule("Task Schedule", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TaskSchedule("Task Schedule", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleImplicitTask(ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, + unsigned int ActualParallelism, + unsigned int Index, int Flags) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ImplicitTask( + "Implicit Task", "", ObserveState::generated, Endpoint, ParallelData, + TaskData, ActualParallelism, Index, Flags)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ImplicitTask( + "Implicit Task", "", ObserveState::generated, Endpoint, ParallelData, + TaskData, ActualParallelism, Index, Flags)); +} + +void OmptCallbackHandler::handleParallelBegin( + ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *ParallelData, + unsigned int RequestedParallelism, int Flags, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ParallelBegin( + "Parallel Begin", "", ObserveState::generated, RequestedParallelism)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ParallelBegin( + "Parallel Begin", "", ObserveState::generated, RequestedParallelism)); +} + +void OmptCallbackHandler::handleParallelEnd(ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ParallelEnd("Parallel End", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ParallelEnd("Parallel End", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleDeviceInitialize( + int DeviceNum, const char *Type, ompt_device_t *Device, + ompt_function_lookup_t LookupFn, const char *DocumentationStr) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceInitialize( + "Device Inititalize", "", ObserveState::generated, DeviceNum, Type, + Device, LookupFn, DocumentationStr)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceInitialize( + "Device Inititalize", "", ObserveState::generated, DeviceNum, Type, + Device, LookupFn, DocumentationStr)); +} + +void OmptCallbackHandler::handleDeviceFinalize(int DeviceNum) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceFinalize( + "Device Finalize", "", ObserveState::generated, DeviceNum)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceFinalize( + "Device Finalize", "", ObserveState::generated, DeviceNum)); +} + +void OmptCallbackHandler::handleTarget(ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_id_t TargetId, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Target("Target", "", ObserveState::generated, + Kind, Endpoint, DeviceNum, TaskData, + TargetId, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Target("Target", "", ObserveState::generated, + Kind, Endpoint, DeviceNum, TaskData, + TargetId, CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetEmi(ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetEmi( + "Target EMI", "", ObserveState::generated, Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, TargetData, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetEmi( + "Target EMI", "", ObserveState::generated, Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, TargetData, CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetSubmit(ompt_id_t TargetId, + ompt_id_t HostOpId, + unsigned int RequestedNumTeams) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetSubmit("Target Submit", "", + ObserveState::generated, TargetId, + HostOpId, RequestedNumTeams)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetSubmit("Target Submit", "", + ObserveState::generated, TargetId, + HostOpId, RequestedNumTeams)); +} + +void OmptCallbackHandler::handleTargetSubmitEmi( + ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetData, + ompt_id_t *HostOpId, unsigned int RequestedNumTeams) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetSubmitEmi( + "Target Submit EMI", "", ObserveState::generated, Endpoint, TargetData, + HostOpId, RequestedNumTeams)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetSubmitEmi( + "Target Submit EMI", "", ObserveState::generated, Endpoint, TargetData, + HostOpId, RequestedNumTeams)); +} + +void OmptCallbackHandler::handleTargetDataOp( + ompt_id_t TargetId, ompt_id_t HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetDataOp( + "Target Data Op", "", ObserveState::generated, TargetId, HostOpId, + OpType, SrcAddr, SrcDeviceNum, DstAddr, DstDeviceNum, Bytes, + CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetDataOp( + "Target Data Op", "", ObserveState::generated, TargetId, HostOpId, + OpType, SrcAddr, SrcDeviceNum, DstAddr, DstDeviceNum, Bytes, + CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetDataOpEmi( + ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetDataOpEmi( + "Target Data Op EMI", "", ObserveState::generated, Endpoint, + TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetDataOpEmi( + "Target Data Op EMI", "", ObserveState::generated, Endpoint, + TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +void OmptCallbackHandler::handleDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceLoad( + "Device Load", "", ObserveState::generated, DeviceNum, Filename, + OffsetInFile, VmaInFile, Bytes, HostAddr, DeviceAddr, ModuleId)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceLoad( + "Device Load", "", ObserveState::generated, DeviceNum, Filename, + OffsetInFile, VmaInFile, Bytes, HostAddr, DeviceAddr, ModuleId)); +} + +void OmptCallbackHandler::handleDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceUnload("Device Unload", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceUnload("Device Unload", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleBufferRequest(int DeviceNum, + ompt_buffer_t **Buffer, + size_t *Bytes) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRequest("Buffer Request", "", + ObserveState::generated, + DeviceNum, Buffer, Bytes)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRequest("Buffer Request", "", + ObserveState::generated, DeviceNum, + Buffer, Bytes)); +} + +void OmptCallbackHandler::handleBufferComplete(int DeviceNum, + ompt_buffer_t *Buffer, + size_t Bytes, + ompt_buffer_cursor_t Begin, + int BufferOwned) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferComplete( + "Buffer Complete", "", ObserveState::generated, DeviceNum, Buffer, + Bytes, Begin, BufferOwned)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferComplete( + "Buffer Complete", "", ObserveState::generated, DeviceNum, Buffer, + Bytes, Begin, BufferOwned)); +} + +void OmptCallbackHandler::handleBufferRecord(ompt_record_ompt_t *Record) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRecord("Buffer Record", "", + ObserveState::generated, Record)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRecord("Buffer Record", "", + ObserveState::generated, Record)); +} + +void OmptCallbackHandler::handleBufferRecordDeallocation( + ompt_buffer_t *Buffer) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRecordDeallocation( + "Buffer Deallocation", "", ObserveState::generated, Buffer)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRecordDeallocation( + "Buffer Deallocation", "", ObserveState::generated, Buffer)); +} + +void OmptCallbackHandler::handleWork(ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Work("Work", "", ObserveState::generated, + WorkType, Endpoint, ParallelData, + TaskData, Count, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Work("Work", "", ObserveState::generated, + WorkType, Endpoint, ParallelData, TaskData, + Count, CodeptrRA)); +} + +void OmptCallbackHandler::handleSyncRegion(ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::SyncRegion( + "SyncRegion", "", ObserveState::generated, Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::SyncRegion( + "SyncRegion", "", ObserveState::generated, Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); +} + +void OmptCallbackHandler::handleDispatch(ompt_data_t *ParallelData, + ompt_data_t *TaskData, + ompt_dispatch_t Kind, + ompt_data_t Instance) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Dispatch("Dispatch", "", + ObserveState::generated, ParallelData, + TaskData, Kind, Instance)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Dispatch("Dispatch", "", ObserveState::generated, + ParallelData, TaskData, Kind, + Instance)); +} + +void OmptCallbackHandler::handleAssertionSyncPoint( + const std::string &SyncPointName) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::AssertionSyncPoint( + "Assertion SyncPoint", "", ObserveState::generated, SyncPointName)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::AssertionSyncPoint( + "Assertion SyncPoint", "", ObserveState::generated, SyncPointName)); +} + +void OmptCallbackHandler::recordEvent(OmptAssertEvent &&Event) { + RecordedEvents.emplace_back(std::forward(Event)); +} diff --git a/openmp/tools/omptest/src/OmptTester.cpp b/openmp/tools/omptest/src/OmptTester.cpp new file mode 100644 index 0000000000000..22de91046fbdc --- /dev/null +++ b/openmp/tools/omptest/src/OmptTester.cpp @@ -0,0 +1,504 @@ +//===- OmptTester.cpp - ompTest OMPT tool implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the core implementation file for the ompTest library. +/// It provides the actual OMPT tool implementation: registers callbacks, etc. +/// OMPT callbacks are passed to their corresponding handler, which in turn +/// notifies all registered asserters. +/// +//===----------------------------------------------------------------------===// + +#include "OmptTester.h" + +#include +#include +#include +#include + +using namespace omptest; + +// Callback handler, which receives and relays OMPT callbacks +extern OmptCallbackHandler *Handler; + +// EventListener, which will actually print the OMPT events +static OmptEventReporter *EventReporter; + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +#define OMPT_BUFFER_REQUEST_SIZE 256 + +#ifdef OPENMP_LIBOMPTEST_BUILD_STANDALONE +std::map TestRegistrar::Tests; +#endif + +static std::atomic NextOpId{0x8000000000000001}; +static bool UseEMICallbacks = false; +static bool UseTracing = false; +static bool RunAsTestSuite = false; +static bool ColoredLog = false; + +// OMPT entry point handles +static ompt_set_trace_ompt_t ompt_set_trace_ompt = 0; +static ompt_start_trace_t ompt_start_trace = 0; +static ompt_flush_trace_t ompt_flush_trace = 0; +static ompt_stop_trace_t ompt_stop_trace = 0; +static ompt_get_record_ompt_t ompt_get_record_ompt = 0; +static ompt_advance_buffer_cursor_t ompt_advance_buffer_cursor = 0; +static ompt_get_record_type_t ompt_get_record_type_fn = 0; + +// OMPT device side tracing: Currently traced devices +typedef std::unordered_set OmptDeviceSetTy; +typedef std::unique_ptr OmptDeviceSetPtrTy; +static OmptDeviceSetPtrTy TracedDevices; + +// OMPT callbacks + +// Trace record callbacks +static void on_ompt_callback_buffer_request(int device_num, + ompt_buffer_t **buffer, + size_t *bytes) { + *bytes = OMPT_BUFFER_REQUEST_SIZE; + *buffer = malloc(*bytes); + OmptCallbackHandler::get().handleBufferRequest(device_num, buffer, bytes); +} + +// Note: This callback must handle a null begin cursor. Currently, +// ompt_get_record_ompt, print_record_ompt, and +// ompt_advance_buffer_cursor handle a null cursor. +static void on_ompt_callback_buffer_complete( + int device_num, ompt_buffer_t *buffer, + size_t bytes, /* bytes returned in this callback */ + ompt_buffer_cursor_t begin, int buffer_owned) { + OmptCallbackHandler::get().handleBufferComplete(device_num, buffer, bytes, + begin, buffer_owned); + + int Status = 1; + ompt_buffer_cursor_t CurrentPos = begin; + while (Status) { + ompt_record_ompt_t *Record = ompt_get_record_ompt(buffer, CurrentPos); + if (ompt_get_record_type_fn(buffer, CurrentPos) != ompt_record_ompt) { + printf("WARNING: received non-ompt type buffer object\n"); + } + // TODO: Sometimes it may happen that the retrieved record may be null?! + // Only handle non-null records + if (Record != nullptr) + OmptCallbackHandler::get().handleBufferRecord(Record); + Status = ompt_advance_buffer_cursor(/*device=*/NULL, buffer, bytes, + CurrentPos, &CurrentPos); + } + if (buffer_owned) { + OmptCallbackHandler::get().handleBufferRecordDeallocation(buffer); + free(buffer); + } +} + +static ompt_set_result_t set_trace_ompt(ompt_device_t *Device) { + if (!ompt_set_trace_ompt) + return ompt_set_error; + + if (UseEMICallbacks) { + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_emi); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_data_op_emi); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_submit_emi); + } else { + ompt_set_trace_ompt(Device, /*enable=*/1, /*etype=*/ompt_callback_target); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_data_op); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_submit); + } + + return ompt_set_always; +} + +/////// HOST-RELATED ////// + +static void on_ompt_callback_thread_begin(ompt_thread_t thread_type, + ompt_data_t *thread_data) { + OmptCallbackHandler::get().handleThreadBegin(thread_type, thread_data); +} + +static void on_ompt_callback_thread_end(ompt_data_t *thread_data) { + OmptCallbackHandler::get().handleThreadEnd(thread_data); +} + +static void on_ompt_callback_parallel_begin( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, + unsigned int requested_parallelism, int flags, const void *codeptr_ra) { + OmptCallbackHandler::get().handleParallelBegin( + encountering_task_data, encountering_task_frame, parallel_data, + requested_parallelism, flags, codeptr_ra); +} + +static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + int flags, const void *codeptr_ra) { + OmptCallbackHandler::get().handleParallelEnd( + parallel_data, encountering_task_data, flags, codeptr_ra); +} + +static void +on_ompt_callback_task_create(ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *new_task_data, int flags, + int has_dependences, const void *codeptr_ra) { + OmptCallbackHandler::get().handleTaskCreate( + encountering_task_data, encountering_task_frame, new_task_data, flags, + has_dependences, codeptr_ra); +} + +static void on_ompt_callback_task_schedule(ompt_data_t *prior_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *next_task_data) { + OmptCallbackHandler::get().handleTaskSchedule( + prior_task_data, prior_task_status, next_task_data); +} + +static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int actual_parallelism, + unsigned int index, int flags) { + OmptCallbackHandler::get().handleImplicitTask( + endpoint, parallel_data, task_data, actual_parallelism, index, flags); +} + +// Callbacks as of Table 19.4, which are not considered required for a minimal +// conforming OMPT implementation. +static void on_ompt_callback_work(ompt_work_t work_type, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, uint64_t count, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleWork(work_type, endpoint, parallel_data, + task_data, count, codeptr_ra); +} + +static void on_ompt_callback_dispatch(ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance) { + OmptCallbackHandler::get().handleDispatch(parallel_data, task_data, kind, + instance); +} + +static void on_ompt_callback_sync_region(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleSyncRegion(kind, endpoint, parallel_data, + task_data, codeptr_ra); +} + +/////// DEVICE-RELATED ////// + +// Synchronous callbacks +static void on_ompt_callback_device_initialize(int device_num, const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation) { + OmptCallbackHandler::get().handleDeviceInitialize(device_num, type, device, + lookup, documentation); + if (!UseTracing) + return; + + if (!lookup) { + printf("Trace collection disabled on device %d\n", device_num); + return; + } + + ompt_set_trace_ompt = (ompt_set_trace_ompt_t)lookup("ompt_set_trace_ompt"); + ompt_start_trace = (ompt_start_trace_t)lookup("ompt_start_trace"); + ompt_flush_trace = (ompt_flush_trace_t)lookup("ompt_flush_trace"); + ompt_stop_trace = (ompt_stop_trace_t)lookup("ompt_stop_trace"); + ompt_get_record_ompt = (ompt_get_record_ompt_t)lookup("ompt_get_record_ompt"); + ompt_advance_buffer_cursor = + (ompt_advance_buffer_cursor_t)lookup("ompt_advance_buffer_cursor"); + + ompt_get_record_type_fn = + (ompt_get_record_type_t)lookup("ompt_get_record_type"); + if (!ompt_get_record_type_fn) { + printf("WARNING: No function ompt_get_record_type found in device " + "callbacks\n"); + } + + static bool IsDeviceMapInitialized = false; + if (!IsDeviceMapInitialized) { + TracedDevices = std::make_unique(); + IsDeviceMapInitialized = true; + } + + set_trace_ompt(device); + + // In many scenarios, this will be a good place to start the + // trace. If start_trace is called from the main program before this + // callback is dispatched, the start_trace handle will be null. This + // is because this device_init callback is invoked during the first + // target construct implementation. + + start_trace(device); +} + +static void on_ompt_callback_device_finalize(int device_num) { + OmptCallbackHandler::get().handleDeviceFinalize(device_num); +} + +static void on_ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, + void *vma_in_file, size_t bytes, + void *host_addr, void *device_addr, + uint64_t module_id) { + OmptCallbackHandler::get().handleDeviceLoad( + device_num, filename, offset_in_file, vma_in_file, bytes, host_addr, + device_addr, module_id); +} + +static void on_ompt_callback_device_unload(int device_num, uint64_t module_id) { + OmptCallbackHandler::get().handleDeviceUnload(device_num, module_id); +} + +static void on_ompt_callback_target_data_op( + ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra) { + OmptCallbackHandler::get().handleTargetDataOp( + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleTarget(kind, endpoint, device_num, task_data, + target_id, codeptr_ra); +} + +static void on_ompt_callback_target_submit(ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams) { + OmptCallbackHandler::get().handleTargetSubmit(target_id, host_op_id, + requested_num_teams); +} + +static void on_ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_id_t *host_op_id, + ompt_target_data_op_t optype, void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + // Both src and dest must not be null + // However, for omp_target_alloc only the END call holds a value for one of + // the two entries + if (optype != ompt_target_data_alloc) + assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); + if (endpoint == ompt_scope_begin) + *host_op_id = NextOpId.fetch_add(1, std::memory_order_relaxed); + OmptCallbackHandler::get().handleTargetDataOpEmi( + endpoint, target_task_data, target_data, host_op_id, optype, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + if (endpoint == ompt_scope_begin) + target_data->value = NextOpId.fetch_add(1, std::memory_order_relaxed); + OmptCallbackHandler::get().handleTargetEmi(kind, endpoint, device_num, + task_data, target_task_data, + target_data, codeptr_ra); +} + +static void on_ompt_callback_target_submit_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, + ompt_id_t *host_op_id, unsigned int requested_num_teams) { + OmptCallbackHandler::get().handleTargetSubmitEmi( + endpoint, target_data, host_op_id, requested_num_teams); +} + +static void on_ompt_callback_target_map(ompt_id_t target_id, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map callback is unimplemented"); +} + +static void on_ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, + void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map emi callback is unimplemented"); +} + +/// Load the value of a given boolean environmental variable. +bool getBoolEnvironmentVariable(const char *VariableName) { + if (VariableName == nullptr) + return false; + if (const char *EnvValue = std::getenv(VariableName)) { + std::string S{EnvValue}; + for (auto &C : S) + C = (char)std::tolower(C); + if (S == "1" || S == "on" || S == "true" || S == "yes") + return true; + } + return false; +} + +/// Called by the OMP runtime to initialize the OMPT +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback_t ompt_set_callback = nullptr; + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + if (!ompt_set_callback) + return 0; // failure + + UseEMICallbacks = getBoolEnvironmentVariable("OMPTEST_USE_OMPT_EMI"); + UseTracing = getBoolEnvironmentVariable("OMPTEST_USE_OMPT_TRACING"); + RunAsTestSuite = getBoolEnvironmentVariable("OMPTEST_RUN_AS_TESTSUITE"); + ColoredLog = getBoolEnvironmentVariable("OMPTEST_LOG_COLORED"); + + register_ompt_callback(ompt_callback_thread_begin); + register_ompt_callback(ompt_callback_thread_end); + register_ompt_callback(ompt_callback_parallel_begin); + register_ompt_callback(ompt_callback_parallel_end); + register_ompt_callback(ompt_callback_work); + // register_ompt_callback(ompt_callback_dispatch); + register_ompt_callback(ompt_callback_task_create); + // register_ompt_callback(ompt_callback_dependences); + // register_ompt_callback(ompt_callback_task_dependence); + register_ompt_callback(ompt_callback_task_schedule); + register_ompt_callback(ompt_callback_implicit_task); + // register_ompt_callback(ompt_callback_masked); + register_ompt_callback(ompt_callback_sync_region); + // register_ompt_callback(ompt_callback_mutex_acquire); + // register_ompt_callback(ompt_callback_mutex); + // register_ompt_callback(ompt_callback_nestLock); + // register_ompt_callback(ompt_callback_flush); + // register_ompt_callback(ompt_callback_cancel); + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_device_unload); + + if (UseEMICallbacks) { + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_map_emi); + } else { + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target_map); + } + + // Construct & subscribe the reporter, so it will be notified of events + EventReporter = new OmptEventReporter(); + OmptCallbackHandler::get().subscribe(EventReporter); + + if (RunAsTestSuite) + EventReporter->setActive(false); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) { + assert(Handler && "Callback handler should be present at this point"); + assert(EventReporter && "EventReporter should be present at this point"); + delete Handler; + delete EventReporter; +} + +#ifdef __cplusplus +extern "C" { +#endif +/// Called from the OMP Runtime to start / initialize the tool +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = { + &ompt_initialize, &ompt_finalize, {0}}; + return &ompt_start_tool_result; +} + +int start_trace(ompt_device_t *Device) { + if (!ompt_start_trace) + return 0; + + // This device will be traced + assert(TracedDevices->find(Device) == TracedDevices->end() && + "Device already present in the map"); + TracedDevices->insert(Device); + + return ompt_start_trace(Device, &on_ompt_callback_buffer_request, + &on_ompt_callback_buffer_complete); +} + +int flush_trace(ompt_device_t *Device) { + if (!ompt_flush_trace) + return 0; + return ompt_flush_trace(Device); +} + +int flush_traced_devices() { + if (!ompt_flush_trace || TracedDevices == nullptr) + return 0; + + size_t NumFlushedDevices = 0; + for (auto Device : *TracedDevices) + if (ompt_flush_trace(Device) == 1) + ++NumFlushedDevices; + + // Provide time to process triggered assert events + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + return (NumFlushedDevices == TracedDevices->size()); +} + +int stop_trace(ompt_device_t *Device) { + if (!ompt_stop_trace) + return 0; + + // This device will not be traced anymore + assert(TracedDevices->find(Device) != TracedDevices->end() && + "Device not present in the map"); + TracedDevices->erase(Device); + + return ompt_stop_trace(Device); +} + +// This is primarily used to stop unwanted prints from happening. +void libomptest_global_eventreporter_set_active(bool State) { + assert(EventReporter && "EventReporter should be present at this point"); + EventReporter->setActive(State); +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/tools/omptest/src/OmptTesterStandalone.cpp b/openmp/tools/omptest/src/OmptTesterStandalone.cpp new file mode 100644 index 0000000000000..d4f68b4576536 --- /dev/null +++ b/openmp/tools/omptest/src/OmptTesterStandalone.cpp @@ -0,0 +1,147 @@ +//===- OmptTesterStandalone.cpp - Standalone unit testing impl. -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the 'standalone' ompTest unit testing core +/// implementation, defining the general test suite and test case execution. +/// +//===----------------------------------------------------------------------===// + +#include "OmptTesterStandalone.h" +#include "OmptCallbackHandler.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace omptest; + +Error TestCase::exec() { + Error E; + E.Fail = false; + + if (IsDisabled) + return E; + + OmptCallbackHandler::get().subscribe(SequenceAsserter.get()); + OmptCallbackHandler::get().subscribe(SetAsserter.get()); + OmptCallbackHandler::get().subscribe(EventReporter.get()); + + execImpl(); + + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // We remove subscribers to not be notified of events after our test case + // finished. + OmptCallbackHandler::get().clearSubscribers(); + omptest::AssertState SequenceResultState = SequenceAsserter->checkState(); + omptest::AssertState SetResultState = SetAsserter->checkState(); + bool AnyFail = SequenceResultState == omptest::AssertState::fail || + SetResultState == omptest::AssertState::fail; + bool AllPass = SequenceResultState == omptest::AssertState::pass && + SetResultState == omptest::AssertState::pass; + if (ExpectedState == omptest::AssertState::pass && AnyFail) + E.Fail = true; + else if (ExpectedState == omptest::AssertState::fail && AllPass) + E.Fail = true; + if (AnyFail) + ResultState = omptest::AssertState::fail; + return E; +} + +TestSuite::TestSuite(TestSuite &&O) { + Name = O.Name; + TestCases.swap(O.TestCases); +} + +void TestSuite::setup() {} + +void TestSuite::teardown() {} + +TestSuite::TestCaseVec::iterator TestSuite::begin() { + return TestCases.begin(); +} + +TestSuite::TestCaseVec::iterator TestSuite::end() { return TestCases.end(); } + +TestRegistrar &TestRegistrar::get() { + static TestRegistrar TR; + return TR; +} + +std::vector TestRegistrar::getTestSuites() { + std::vector TSs; + for (auto &[k, v] : Tests) + TSs.emplace_back(std::move(v)); + return TSs; +} + +void TestRegistrar::addCaseToSuite(TestCase *TC, std::string TSName) { + auto &TS = Tests[TSName]; + if (TS.Name.empty()) + TS.Name = TSName; + TS.TestCases.emplace_back(TC); +} + +Registerer::Registerer(TestCase *TC, const std::string SuiteName) { + std::cout << "Adding " << TC->Name << " to " << SuiteName << std::endl; + TestRegistrar::get().addCaseToSuite(TC, SuiteName); +} + +int Runner::run() { + int ErrorCount = 0; + for (auto &TS : TestSuites) { + std::cout << "\n======\nExecuting for " << TS.Name << std::endl; + TS.setup(); + for (auto &TC : TS) { + std::cout << "\nExecuting " << TC->Name << std::endl; + if (Error Err = TC->exec()) { + reportError(Err); + abortOrKeepGoing(); + ++ErrorCount; + } + } + TS.teardown(); + } + printSummary(); + return ErrorCount; +} + +void Runner::reportError(const Error &Err) {} + +void Runner::abortOrKeepGoing() {} + +void Runner::printSummary() { + std::cout << "\n====== SUMMARY\n"; + for (auto &TS : TestSuites) { + std::cout << " - " << TS.Name; + for (auto &TC : TS) { + std::string Result; + if (TC->IsDisabled) { + Result = "-#-#-"; + } else if (TC->ResultState == TC->ExpectedState) { + if (TC->ResultState == omptest::AssertState::pass) + Result = "PASS"; + else if (TC->ResultState == omptest::AssertState::fail) + Result = "XFAIL"; + } else { + if (TC->ResultState == omptest::AssertState::fail) + Result = "FAIL"; + else if (TC->ResultState == omptest::AssertState::pass) + Result = "UPASS"; + } + std::cout << "\n " << std::setw(5) << Result << " : " << TC->Name; + } + std::cout << std::endl; + } +} diff --git a/openmp/tools/omptest/test/CMakeLists.txt b/openmp/tools/omptest/test/CMakeLists.txt new file mode 100644 index 0000000000000..427893313cc67 --- /dev/null +++ b/openmp/tools/omptest/test/CMakeLists.txt @@ -0,0 +1,28 @@ +##===----------------------------------------------------------------------===## +# +# Add ompTest unit tests to check-openmp. +# +##===----------------------------------------------------------------------===## + +# Target: ompTest library unit tests +file(GLOB UNITTEST_SOURCES "unittests/*.cpp") +add_executable(omptest-unittests ${UNITTEST_SOURCES}) + +# Add local and LLVM-provided GoogleTest include directories. +target_include_directories(omptest-unittests PRIVATE + ../include + ${LLVM_THIRD_PARTY_DIR}/unittest/googletest/include) + +target_link_libraries(omptest-unittests PRIVATE omptest) + +set_target_properties(omptest-unittests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +# Add ompTest unit tests to check-openmp +add_openmp_testsuite(check-ompt-omptest "Running OMPT ompTest unit tests" + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omptest-unittests) + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by OPENMP_TOOLS_OMPTEST_TEST " + "configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) diff --git a/openmp/tools/omptest/test/lit.cfg b/openmp/tools/omptest/test/lit.cfg new file mode 100644 index 0000000000000..69c401aed83b8 --- /dev/null +++ b/openmp/tools/omptest/test/lit.cfg @@ -0,0 +1,26 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +# name: The name of this test suite. +config.name = 'OMPT ompTest' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = [''] + +# test_source_root: The root path where tests are located. +config.test_source_root = config.test_obj_root + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.test_obj_root + +# test format, match (omptest-)unittests +# Matched binaries (GoogleTests) are executed +config.test_format = lit.formats.GoogleTest(".", "unittests") diff --git a/openmp/tools/omptest/test/lit.site.cfg.in b/openmp/tools/omptest/test/lit.site.cfg.in new file mode 100644 index 0000000000000..4fa8c7e349681 --- /dev/null +++ b/openmp/tools/omptest/test/lit.site.cfg.in @@ -0,0 +1,9 @@ + at AUTO_GEN_COMMENT@ + +config.test_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp b/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp new file mode 100644 index 0000000000000..34ceb7597b791 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp @@ -0,0 +1,358 @@ +#include "OmptAliases.h" +#include "OmptAsserter.h" +#include +#include + +#include "gtest/gtest.h" + +using namespace omptest; +using OAE = omptest::OmptAssertEvent; +using OS = omptest::ObserveState; + +/// SequencedAsserter test-fixture class to avoid code duplication among tests. +class OmptSequencedAsserterTest : public testing::Test { +protected: + OmptSequencedAsserterTest() { + // Construct default sequenced asserter + SeqAsserter = std::make_unique(); + + // Silence all potential log prints + SeqAsserter->getLog()->setLoggingLevel(logging::Level::SILENT); + } + + std::unique_ptr SeqAsserter; +}; + +TEST_F(OmptSequencedAsserterTest, DefaultState) { + // Assertion should neither start as 'deactivated' nor 'suspended' + ASSERT_EQ(SeqAsserter->isActive(), true); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + + // Assertion should begin with event ID zero + ASSERT_EQ(SeqAsserter->NextEvent, 0); + + // Assertion should begin without previous notifications or assertions + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + + // There should be no expected events + ASSERT_EQ(SeqAsserter->Events.empty(), true); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + + // Default mode should be strict + ASSERT_NE(SeqAsserter->getOperationMode(), AssertMode::relaxed); + ASSERT_EQ(SeqAsserter->getOperationMode(), AssertMode::strict); + + // Default state should be passing + ASSERT_NE(SeqAsserter->getState(), AssertState::fail); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); + ASSERT_NE(SeqAsserter->checkState(), AssertState::fail); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, IgnoreNotificationsWhenEmpty) { + // ParallelBegin events are suppressed by default + auto SuppressedEvent = OAE::ParallelBegin( + /*Name=*/"ParBegin", /*Group=*/"", /*Expected=*/OS::always, + /*NumThreads=*/3); + + // DeviceFinalize events are not ignored by default + auto IgnoredEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + + // Situation: There is nothing to assert. + // Result: All notifications are ignored. + // Hence, check that the perceived count of notifications remains unchanged + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + SeqAsserter->notify(std::move(SuppressedEvent)); + + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + SeqAsserter->notify(std::move(IgnoredEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, IgnoreNotificationsWhileDeactivated) { + auto ExpectedEvent = OAE::DeviceUnload( + /*Name=*/"DevUnload", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Deactivate asserter, effectively ignoring notifications + SeqAsserter->setActive(false); + ASSERT_EQ(SeqAsserter->isActive(), false); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // DeviceFinalize events are not ignored by default + auto IgnoredEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->notify(std::move(IgnoredEvent)); + + // Assertion was deactivated: No change + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + + SeqAsserter->setActive(true); + ASSERT_EQ(SeqAsserter->isActive(), true); + + auto ObservedEvent = OAE::DeviceUnload( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->notify(std::move(ObservedEvent)); + + // Assertion was activated, one notification expected + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEvent) { + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // Sanity check: Notifications should not be triggered + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + // Adding an expected event must change the event count but not the state + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventIgnoreSuppressed) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // ParallelBegin events are suppressed by default + auto SuppressedEvent = OAE::ParallelBegin( + /*Name=*/"ParBegin", /*Group=*/"", /*Expected=*/OS::always, + /*NumThreads=*/3); + // Situation: There is one expected event and ParallelBegins are suppressed. + // Notification count remains unchanged for suppressed events + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + SeqAsserter->notify(std::move(SuppressedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObservePass) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObserveFail) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + // Provide wrong DeviceNum + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/23); + + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + // Observed and expected event do not match: Fail + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObserveDifferentType) { + auto ExpectedEvent = OAE::DeviceUnload( + /*Name=*/"DevUnload", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + // Observed and expected event do not match: Fail + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckTargetGroupNoEffect) { + // Situation: Groups are designed to be used as an indicator -WITHIN- target + // regions. Hence, comparing two target regions w.r.t. their groups has no + // effect on pass or fail. + + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Deactivate asserter, effectively ignoring notifications + SeqAsserter->setActive(false); + ASSERT_EQ(SeqAsserter->isActive(), false); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + + // Assertion was deactivated: No change + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Re-activate asserter + SeqAsserter->setActive(true); + ASSERT_EQ(SeqAsserter->isActive(), true); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // Actually observe a target event from "AnotherGroup" + auto AnotherObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"AnotherGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(AnotherObservedEvent)); + + // Observed all expected events; groups of target regions do not affect pass + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, CheckSyncPoint) { + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + SeqAsserter->notify(OAE::AssertionSyncPoint( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::always, + /*SyncPointName=*/"SyncPoint 1")); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + + // All events processed: SyncPoint "passes" + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + auto AnotherExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + SeqAsserter->insert(std::move(AnotherExpectedEvent)); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Remaining events present: SyncPoint "fails" + SeqAsserter->notify(OAE::AssertionSyncPoint( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::always, + /*SyncPointName=*/"SyncPoint 2")); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckExcessNotify) { + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + // All events processed: pass + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + // Target events are not ignored by default + auto AnotherObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + + // No more events expected: notify "fails" + SeqAsserter->notify(std::move(AnotherObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckSuspend) { + SeqAsserter->insert(OAE::AssertionSuspend( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::never)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Being notified while the next expected event is a "suspend" should change + // the asserter's state + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + SeqAsserter->notify(OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7)); + ASSERT_EQ(SeqAsserter->AssertionSuspended, true); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + + // Being notified with an observed event, which matches the next expected + // event, resumes assertion (suspended = false) + ASSERT_EQ(SeqAsserter->AssertionSuspended, true); + SeqAsserter->notify(OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr)); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} diff --git a/openmp/tools/omptest/test/unittests/internal-event-test.cpp b/openmp/tools/omptest/test/unittests/internal-event-test.cpp new file mode 100644 index 0000000000000..a53025460c7e0 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/internal-event-test.cpp @@ -0,0 +1,530 @@ +#include "InternalEvent.h" +#include +#include + +#include "gtest/gtest.h" + +using namespace omptest; + +TEST(InternalEvent_toString, AssertionSyncPoint) { + internal::AssertionSyncPoint SP{/*Name=*/"Test Sync Point"}; + + EXPECT_EQ(SP.toString(), "Assertion SyncPoint: 'Test Sync Point'"); +} + +TEST(InternalEvent_toString, ThreadBegin) { + internal::ThreadBegin TB{/*ThreadType=*/ompt_thread_t::ompt_thread_initial}; + + EXPECT_EQ(TB.toString(), "OMPT Callback ThreadBegin: ThreadType=1"); +} + +TEST(InternalEvent_toString, ThreadEnd) { + internal::ThreadEnd TE{}; + + EXPECT_EQ(TE.toString(), "OMPT Callback ThreadEnd"); +} + +TEST(InternalEvent_toString, ParallelBegin) { + internal::ParallelBegin PB{/*NumThreads=*/31}; + + EXPECT_EQ(PB.toString(), "OMPT Callback ParallelBegin: NumThreads=31"); +} + +TEST(InternalEvent_toString, ParallelEnd) { + internal::ParallelEnd PE{/*ParallelData=*/(ompt_data_t *)0x11, + /*EncounteringTaskData=*/(ompt_data_t *)0x22, + /*Flags=*/31, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(PE.toString(), "OMPT Callback ParallelEnd"); +} + +TEST(InternalEvent_toString, Work) { + internal::Work WK{/*WorkType=*/ompt_work_t::ompt_work_loop_dynamic, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_beginend, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Count=*/31, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(WK.toString(), + "OMPT Callback Work: work_type=11 endpoint=3 parallel_data=0x11 " + "task_data=0x22 count=31 codeptr=0x33"); +} + +TEST(InternalEvent_toString, Dispatch_iteration) { + ompt_data_t DI{.value = 31}; + internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_iteration, + /*Instance=*/DI}; + + EXPECT_EQ(D.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=1 instance=[it=31]"); +} + +TEST(InternalEvent_toString, Dispatch_section) { + ompt_data_t DI{.ptr = (void *)0x33}; + internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_section, + /*Instance=*/DI}; + + EXPECT_EQ(D.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=2 instance=[ptr=0x33]"); +} + +TEST(InternalEvent_toString, Dispatch_chunks) { + ompt_dispatch_chunk_t DC{.start = 7, .iterations = 31}; + ompt_data_t DI{.ptr = (void *)&DC}; + + internal::Dispatch DLoop{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_ws_loop_chunk, + /*Instance=*/DI}; + + internal::Dispatch DTask{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_taskloop_chunk, + /*Instance=*/DI}; + + internal::Dispatch DDist{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_distribute_chunk, + /*Instance=*/DI}; + + ompt_data_t DINull{.ptr = nullptr}; + internal::Dispatch DDistNull{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_distribute_chunk, + /*Instance=*/DINull}; + + EXPECT_EQ(DLoop.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=3 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DTask.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=4 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DDist.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=5 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DDistNull.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=5"); +} + +TEST(InternalEvent_toString, TaskCreate) { + internal::TaskCreate TC{/*EncounteringTaskData=*/(ompt_data_t *)0x11, + /*EncounteringTaskFrame=*/(const ompt_frame_t *)0x22, + /*NewTaskData=*/(ompt_data_t *)0x33, + /*Flags=*/7, + /*HasDependences=*/31, + /*CodeptrRA=*/(const void *)0x44}; + + EXPECT_EQ(TC.toString(), + "OMPT Callback TaskCreate: encountering_task_data=0x11 " + "encountering_task_frame=0x22 new_task_data=0x33 flags=7 " + "has_dependences=31 codeptr=0x44"); +} + +TEST(InternalEvent_toString, ImplicitTask) { + internal::ImplicitTask IT{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*ActualParallelism=*/7, + /*Index=*/31, + /*Flags=*/127}; + + EXPECT_EQ(IT.toString(), + "OMPT Callback ImplicitTask: endpoint=1 parallel_data=0x11 " + "task_data=0x22 actual_parallelism=7 index=31 flags=127"); +} + +TEST(InternalEvent_toString, SyncRegion) { + internal::SyncRegion SR{ + /*Kind=*/ompt_sync_region_t::ompt_sync_region_taskwait, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_end, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(SR.toString(), "OMPT Callback SyncRegion: kind=5 endpoint=2 " + "parallel_data=0x11 task_data=0x22 codeptr=0x33"); +} + +TEST(InternalEvent_toString, Target) { + internal::Target T{/*Kind=*/ompt_target_t::ompt_target_enter_data_nowait, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_end, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)0x11, + /*TargetId=*/(ompt_id_t)31, + /*CodeptrRA=*/(const void *)0x22}; + + EXPECT_EQ(T.toString(), "Callback Target: target_id=31 kind=10 " + "endpoint=2 device_num=7 code=0x22"); +} + +TEST(InternalEvent_toString, TargetEmi) { + ompt_data_t TaskData{.value = 31}; + ompt_data_t TargetTaskData{.value = 127}; + ompt_data_t TargetData{.value = 8191}; + + internal::TargetEmi T{/*Kind=*/ompt_target_t::ompt_target_update, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)&TaskData, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*CodeptrRA=*/(const void *)0x11}; + + internal::TargetEmi TDataNull{ + /*Kind=*/ompt_target_t::ompt_target_update, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)&TaskData, + /*TargetTaskData=*/(ompt_data_t *)nullptr, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*CodeptrRA=*/(const void *)0x11}; + + std::ostringstream StreamT1; + std::ostringstream StreamT2; + std::string CallBackPrefix{ + "Callback Target EMI: kind=4 endpoint=1 device_num=7"}; + StreamT1 << CallBackPrefix << std::showbase << std::hex; + StreamT1 << " task_data=" << &TaskData << " (0x1f)"; + StreamT1 << " target_task_data=" << &TargetTaskData << " (0x7f)"; + StreamT1 << " target_data=" << &TargetData << " (0x1fff)"; + StreamT1 << " code=0x11"; + + StreamT2 << CallBackPrefix << std::showbase << std::hex; + StreamT2 << " task_data=" << &TaskData << " (0x1f)"; + StreamT2 << " target_task_data=(nil) (0x0)"; + StreamT2 << " target_data=" << &TargetData << " (0x1fff)"; + StreamT2 << " code=0x11"; + + EXPECT_EQ(T.toString(), StreamT1.str()); + EXPECT_EQ(TDataNull.toString(), StreamT2.str()); +} + +TEST(InternalEvent_toString, TargetDataOp) { + internal::TargetDataOp TDO{ + /*TargetId=*/7, + /*HostOpId=*/31, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_associate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/127, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/8191, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ( + TDO.toString(), + " Callback DataOp: target_id=7 host_op_id=31 optype=5 src=0x11 " + "src_device_num=127 dest=0x22 dest_device_num=8191 bytes=4096 code=0x33"); +} + +TEST(InternalEvent_toString, TargetDataOpEmi) { + ompt_data_t TargetTaskData{.value = 31}; + ompt_data_t TargetData{.value = 127}; + ompt_id_t HostOpId = 8191; + + internal::TargetDataOpEmi TDO{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)&HostOpId, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_disassociate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/1, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/2, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + // Set HostOpId=nullptr + internal::TargetDataOpEmi TDO_HostOpIdNull{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)nullptr, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_disassociate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/1, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/2, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + std::ostringstream StreamTDO1; + std::ostringstream StreamTDO2; + std::string CallBackPrefix{" Callback DataOp EMI: endpoint=1 optype=6"}; + std::string CallBackSuffix{ + " src=0x11 src_device_num=1 dest=0x22 dest_device_num=2 " + "bytes=4096 code=0x33"}; + StreamTDO1 << CallBackPrefix << std::showbase << std::hex; + StreamTDO1 << " target_task_data=" << &TargetTaskData << " (0x1f)"; + StreamTDO1 << " target_data=" << &TargetData << " (0x7f)"; + StreamTDO1 << " host_op_id=" << &HostOpId << " (0x1fff)"; + StreamTDO1 << CallBackSuffix; + + StreamTDO2 << CallBackPrefix << std::showbase << std::hex; + StreamTDO2 << " target_task_data=" << &TargetTaskData << " (0x1f)"; + StreamTDO2 << " target_data=" << &TargetData << " (0x7f)"; + StreamTDO2 << " host_op_id=(nil) (0x0)"; + StreamTDO2 << CallBackSuffix; + + EXPECT_EQ(TDO.toString(), StreamTDO1.str()); + EXPECT_EQ(TDO_HostOpIdNull.toString(), StreamTDO2.str()); +} + +TEST(InternalEvent_toString, TargetSubmit) { + internal::TargetSubmit TS{/*TargetId=*/7, + /*HostOpId=*/31, + /*RequestedNumTeams=*/127}; + + EXPECT_EQ(TS.toString(), + " Callback Submit: target_id=7 host_op_id=31 req_num_teams=127"); +} + +TEST(InternalEvent_toString, TargetSubmitEmi) { + ompt_data_t TargetData{.value = 127}; + ompt_id_t HostOpId = 8191; + internal::TargetSubmitEmi TS{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)&HostOpId, + /*RequestedNumTeams=*/7}; + + std::ostringstream StreamTS; + std::string CallBackPrefix{ + " Callback Submit EMI: endpoint=1 req_num_teams=7"}; + StreamTS << CallBackPrefix << std::showbase << std::hex; + StreamTS << " target_data=" << &TargetData << " (0x7f)"; + StreamTS << " host_op_id=" << &HostOpId << " (0x1fff)"; + + EXPECT_EQ(TS.toString(), StreamTS.str()); +} + +TEST(InternalEvent_toString, DeviceInitialize) { + const char *Type = "DeviceType"; + const char *DocStr = "DocumentationString"; + + internal::DeviceInitialize DI{/*DeviceNum=*/7, + /*Type=*/Type, + /*Device=*/(ompt_device_t *)0x11, + /*LookupFn=*/(ompt_function_lookup_t)0x22, + /*DocStr=*/DocStr}; + + internal::DeviceInitialize DINull{/*DeviceNum=*/0, + /*Type=*/nullptr, + /*Device=*/nullptr, + /*LookupFn=*/(ompt_function_lookup_t)0x0, + /*DocStr=*/nullptr}; + + std::ostringstream StreamDI; + std::string CallBackPrefix{"Callback Init: device_num=7 type=DeviceType " + "device=0x11 lookup=0x22 doc="}; + StreamDI << CallBackPrefix << std::showbase << std::hex; + StreamDI << (uint64_t)DocStr; + EXPECT_EQ(DI.toString(), StreamDI.str()); + + // TODO This looks inconsistent: (null) vs. (nil) + EXPECT_EQ(DINull.toString(), "Callback Init: device_num=0 type=(null) " + "device=(nil) lookup=(nil) doc=(nil)"); +} + +TEST(InternalEvent_toString, DeviceFinalize) { + internal::DeviceFinalize DF{/*DeviceNum=*/7}; + + EXPECT_EQ(DF.toString(), "Callback Fini: device_num=7"); +} + +TEST(InternalEvent_toString, DeviceLoad) { + const char *Filename = "FilenameToLoad"; + + internal::DeviceLoad DL{/*DeviceNum=*/7, + /*Filename=*/Filename, + /*OffsetInFile=*/31, + /*VmaInFile=*/(void *)0x11, + /*Bytes=*/127, + /*HostAddr=*/(void *)0x22, + /*DeviceAddr=*/(void *)0x33, + /*ModuleId=*/8191}; + + internal::DeviceLoad DLNull{/*DeviceNum=*/0, + /*Filename=*/nullptr, + /*OffsetInFile=*/0, + /*VmaInFile=*/nullptr, + /*Bytes=*/0, + /*HostAddr=*/nullptr, + /*DeviceAddr=*/nullptr, + /*ModuleId=*/0}; + + EXPECT_EQ( + DL.toString(), + "Callback Load: device_num:7 module_id:8191 " + "filename:FilenameToLoad host_adddr:0x22 device_addr:0x33 bytes:127"); + + // TODO This looks inconsistent: (null) vs. (nil) and ':' instead of '=' + EXPECT_EQ(DLNull.toString(), + "Callback Load: device_num:0 module_id:0 filename:(null) " + "host_adddr:(nil) device_addr:(nil) bytes:0"); +} + +TEST(InternalEvent_toString, BufferRequest) { + size_t Bytes = 7; + ompt_buffer_t *Buffer = (void *)0x11; + + internal::BufferRequest BR{/*DeviceNum=*/31, + /*Buffer=*/&Buffer, + /*Bytes=*/&Bytes}; + + internal::BufferRequest BRNull{/*DeviceNum=*/127, + /*Buffer=*/nullptr, + /*Bytes=*/nullptr}; + + EXPECT_EQ(BR.toString(), + "Allocated 7 bytes at 0x11 in buffer request callback"); + EXPECT_EQ(BRNull.toString(), + "Allocated 0 bytes at (nil) in buffer request callback"); +} + +TEST(InternalEvent_toString, BufferComplete) { + ompt_buffer_t *Buffer = (void *)0x11; + + internal::BufferComplete BC{/*DeviceNum=*/7, + /*Buffer=*/Buffer, + /*Bytes=*/127, + /*Begin=*/8191, + /*BufferOwned=*/1}; + + internal::BufferComplete BCNull{/*DeviceNum=*/0, + /*Buffer=*/nullptr, + /*Bytes=*/0, + /*Begin=*/0, + /*BufferOwned=*/0}; + + EXPECT_EQ(BC.toString(), + "Executing buffer complete callback: 7 0x11 127 0x1fff 1"); + EXPECT_EQ(BCNull.toString(), + "Executing buffer complete callback: 0 (nil) 0 (nil) 0"); +} + +TEST(InternalEvent_toString, BufferRecordInvalid) { + ompt_record_ompt_t InvalidRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_parallel_begin, + /*time=*/7, + /*thread_id=*/31, + /*target_id=*/127, + /*record=*/{.parallel_begin = {}}}; + + internal::BufferRecord BRNull{/*RecordPtr=*/nullptr}; + internal::BufferRecord BRInvalid{/*RecordPtr=*/&InvalidRecord}; + + std::ostringstream StreamBRInvalid; + StreamBRInvalid << "rec=" << std::showbase << std::hex << &InvalidRecord; + StreamBRInvalid << " type=3 (unsupported record type)"; + + EXPECT_EQ(BRNull.toString(), "rec=(nil) type=0 (unsupported record type)"); + EXPECT_EQ(BRInvalid.toString(), StreamBRInvalid.str()); +} + +TEST(InternalEvent_toString, BufferRecordTarget) { + ompt_record_target_t SubRecordTarget{ + /*kind=*/ompt_target_t::ompt_target_update, + /*endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*device_num=*/2, + /*task_id=*/127, + /*target_id=*/31, + /*codeptr_ra=*/(const void *)0x11}; + + ompt_record_ompt_t TargetRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target, + /*time=*/7, + /*thread_id=*/29, + /*target_id=*/31, + /*record*/ {.target = SubRecordTarget}}; + + internal::BufferRecord BR{/*RecordPtr=*/&TargetRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &TargetRecord; + StreamBR << " type=8 (Target task) time=7 thread_id=29 target_id=31 kind=4"; + StreamBR << " endpoint=1 device=2 task_id=127 codeptr=0x11"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordDataOp) { + ompt_record_target_data_op_t SubRecordTargetDataOp{ + /*host_op_id=*/7, + /*optype=*/ompt_target_data_op_t::ompt_target_data_alloc_async, + /*src_addr=*/(void *)0x11, + /*src_device_num=*/1, + /*dest_addr=*/(void *)0x22, + /*dest_device_num=*/2, + /*bytes=*/127, + /*end_time=*/128, + /*codeptr_ra=*/(const void *)0x33, + }; + + ompt_record_ompt_t DataOpRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target_data_op_emi, + /*time=*/8, + /*thread_id=*/3, + /*target_id=*/5, + /*record=*/{.target_data_op = SubRecordTargetDataOp}}; + + internal::BufferRecord BR{/*RecordPtr=*/&DataOpRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &DataOpRecord; + StreamBR << " type=34 (Target data op) time=8 thread_id=3 target_id=5"; + StreamBR << " host_op_id=7 optype=17 src_addr=0x11 src_device=1"; + StreamBR << " dest_addr=0x22 dest_device=2 bytes=127 end_time=128"; + StreamBR << " duration=120 ns codeptr=0x33"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordKernel) { + ompt_record_target_kernel_t SubRecordTargetKernel{ + /*host_op_id=*/11, + /*requested_num_teams=*/127, + /*granted_num_teams=*/63, + /*end_time=*/8191, + }; + + ompt_record_ompt_t KernelRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target_submit_emi, + /*time=*/9, + /*thread_id=*/19, + /*target_id=*/33, + /*record=*/{.target_kernel = SubRecordTargetKernel}}; + + internal::BufferRecord BR{/*RecordPtr=*/&KernelRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &KernelRecord; + StreamBR << " type=35 (Target kernel) time=9 thread_id=19 target_id=33"; + StreamBR << " host_op_id=11 requested_num_teams=127 granted_num_teams=63"; + StreamBR << " end_time=8191 duration=8182 ns"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordDeallocation) { + internal::BufferRecordDeallocation BRD{/*Buffer=*/(ompt_record_ompt_t *)0x11}; + internal::BufferRecordDeallocation BRDNull{/*Buffer=*/nullptr}; + + EXPECT_EQ(BRD.toString(), "Deallocated 0x11"); + EXPECT_EQ(BRDNull.toString(), "Deallocated (nil)"); +} diff --git a/openmp/tools/omptest/test/unittests/internal-util-test.cpp b/openmp/tools/omptest/test/unittests/internal-util-test.cpp new file mode 100644 index 0000000000000..6a9868b85c3a3 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/internal-util-test.cpp @@ -0,0 +1,95 @@ +#include "InternalEvent.h" +#include + +#include "gtest/gtest.h" + +using namespace omptest; + +TEST(InternalUtility, ExpectedDefault_Integer) { + // int: -2147483648 (decimal) = 0x80000000 (hexadecimal) + EXPECT_EQ(expectedDefault(int), 0x80000000); + EXPECT_EQ(expectedDefault(int), (0x1 << 31)); + // int64_t: -9223372036854775808 (decimal) = 0x8000000000000000 (hexadecimal) + EXPECT_EQ(expectedDefault(int64_t), 0x8000000000000000); + EXPECT_EQ(expectedDefault(int64_t), (0x1L << 63)); +} + +TEST(InternalUtility, ExpectedDefault_Zero) { + // Expectedly zero + EXPECT_EQ(expectedDefault(size_t), 0); + EXPECT_EQ(expectedDefault(unsigned int), 0); + EXPECT_EQ(expectedDefault(ompt_id_t), 0); + EXPECT_EQ(expectedDefault(ompt_dispatch_t), 0); + EXPECT_EQ(expectedDefault(ompt_device_time_t), 0); +} + +TEST(InternalUtility, ExpectedDefault_Nullpointer) { + // Expectedly nullptr + EXPECT_EQ(expectedDefault(const char *), nullptr); + EXPECT_EQ(expectedDefault(const void *), nullptr); + EXPECT_EQ(expectedDefault(int *), nullptr); + EXPECT_EQ(expectedDefault(void *), nullptr); + EXPECT_EQ(expectedDefault(ompt_data_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_device_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_frame_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_function_lookup_t), nullptr); + EXPECT_EQ(expectedDefault(ompt_id_t *), nullptr); +} + +TEST(InternalUtility, MakeHexString_PointerValues) { + // IsPointer should only affect zero value + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/true), "(nil)"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false), "0x0"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true), "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/false), "0xff"); +} + +TEST(InternalUtility, MakeHexString_MinimumBytes) { + // Return a minimum length, based on the (minimum) requested bytes + EXPECT_EQ(util::makeHexString(15, /*IsPointer=*/true, /*MinBytes=*/0), "0xf"); + EXPECT_EQ(util::makeHexString(15, /*IsPointer=*/true, /*MinBytes=*/1), + "0x0f"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/0), + "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/1), + "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/2), + "0x00ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/3), + "0x0000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/4), + "0x000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/5), + "0x00000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/6), + "0x0000000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/7), + "0x000000000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/8), + "0x00000000000000ff"); + + // Default to four bytes, if request exceeds eight byte range + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/9), + "0x000000ff"); + + // Disregard requested minimum byte width, if actual value exceeds it + EXPECT_EQ(util::makeHexString(1024, /*IsPointer=*/true, /*MinBytes=*/1), + "0x400"); +} + +TEST(InternalUtility, MakeHexString_HexBase) { + // Cut off "0x" when requested + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/true, /*MinBytes=*/0, + /*ShowHexBase=*/false), + "(nil)"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false, /*MinBytes=*/0, + /*ShowHexBase=*/false), + "0"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false, /*MinBytes=*/1, + /*ShowHexBase=*/false), + "00"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, + /*MinBytes=*/2, + /*ShowHexBase=*/false), + "00ff"); +} diff --git a/openmp/tools/omptest/test/unittests/main-test.cpp b/openmp/tools/omptest/test/unittests/main-test.cpp new file mode 100644 index 0000000000000..2eba663e49c8e --- /dev/null +++ b/openmp/tools/omptest/test/unittests/main-test.cpp @@ -0,0 +1,141 @@ +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptTester.h" +#include + +#include "gtest/gtest.h" + +using OS = omptest::ObserveState; +using OAE = omptest::OmptAssertEvent; + +TEST(CompareOperatorTests, ThreadBeginIdentity) { + auto TBInitial = + OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_initial); + auto TBWorker = OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_worker); + auto TBOther = OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_other); + auto TBUnknown = + OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_unknown); + + ASSERT_EQ(TBInitial, TBInitial); + ASSERT_EQ(TBWorker, TBWorker); + ASSERT_EQ(TBOther, TBOther); + ASSERT_EQ(TBUnknown, TBUnknown); +} + +TEST(CompareOperatorTests, ThreadEndIdentity) { + auto TE = OAE::ThreadEnd("dflt", "", OS::always); + + ASSERT_EQ(TE, TE); +} + +TEST(CompareOperatorTests, ParallelBeginIdentity) { + auto PBNumT = OAE::ParallelBegin("thrdenable", "", OS::always, 3); + + ASSERT_EQ(PBNumT, PBNumT); +} + +TEST(CompareOperatorTests, ParallelEndIdentity) { + auto PEDflt = OAE::ParallelEnd("dflt", "", OS::always); + // TODO: Add cases with parallel data set, task data set, flags + + ASSERT_EQ(PEDflt, PEDflt); +} + +TEST(CompareOperatorTests, WorkIdentity) { + auto WDLoopBgn = + OAE::Work("loopbgn", "", OS::always, ompt_work_loop, ompt_scope_begin); + auto WDLoopEnd = + OAE::Work("loobend", "", OS::always, ompt_work_loop, ompt_scope_end); + + ASSERT_EQ(WDLoopBgn, WDLoopBgn); + ASSERT_EQ(WDLoopEnd, WDLoopEnd); + + auto WDSectionsBgn = OAE::Work("sectionsbgn", "", OS::always, + ompt_work_sections, ompt_scope_begin); + auto WDSectionsEnd = OAE::Work("sectionsend", "", OS::always, + ompt_work_sections, ompt_scope_end); + + // TODO: singleexecutor, single_other, workshare, distribute, taskloop, scope, + // loop_static, loop_dynamic, loop_guided, loop_other + + ASSERT_EQ(WDSectionsBgn, WDSectionsBgn); + ASSERT_EQ(WDSectionsEnd, WDSectionsEnd); +} + +TEST(CompareOperatorTests, DispatchIdentity) { + auto DIDflt = OAE::Dispatch("dflt", "", OS::always); + + ASSERT_EQ(DIDflt, DIDflt); +} + +TEST(CompareOperatorTests, TaskCreateIdentity) { + auto TCDflt = OAE::TaskCreate("dflt", "", OS::always); + + ASSERT_EQ(TCDflt, TCDflt); +} + +TEST(CompareOperatorTests, TaskScheduleIdentity) { + auto TS = OAE::TaskSchedule("dflt", "", OS::always); + + ASSERT_EQ(TS, TS); +} + +TEST(CompareOperatorTests, ImplicitTaskIdentity) { + auto ITDfltBgn = + OAE::ImplicitTask("dfltbgn", "", OS::always, ompt_scope_begin); + auto ITDfltEnd = OAE::ImplicitTask("dfltend", "", OS::always, ompt_scope_end); + + ASSERT_EQ(ITDfltBgn, ITDfltBgn); + ASSERT_EQ(ITDfltEnd, ITDfltEnd); +} + +TEST(CompareOperatorTests, SyncRegionIdentity) { + auto SRDfltBgn = + OAE::SyncRegion("srdfltbgn", "", OS::always, + ompt_sync_region_barrier_explicit, ompt_scope_begin); + auto SRDfltEnd = + OAE::SyncRegion("srdfltend", "", OS::always, + ompt_sync_region_barrier_explicit, ompt_scope_end); + + ASSERT_EQ(SRDfltBgn, SRDfltBgn); + ASSERT_EQ(SRDfltEnd, SRDfltEnd); +} + +TEST(CompareOperatorTests, TargetIdentity) { + auto TargetDfltBgn = + OAE::Target("dfltbgn", "", OS::always, ompt_target, ompt_scope_begin); + auto TargetDfltEnd = + OAE::Target("dfltend", "", OS::always, ompt_target, ompt_scope_end); + + ASSERT_EQ(TargetDfltBgn, TargetDfltBgn); + ASSERT_EQ(TargetDfltEnd, TargetDfltEnd); + + auto TargetDevBgn = OAE::Target("tgtdevbgn", "", OS::always, ompt_target, + ompt_scope_begin, 1); + auto TargetDevEnd = + OAE::Target("tgtdevend", "", OS::always, ompt_target, ompt_scope_end, 1); + + ASSERT_EQ(TargetDevBgn, TargetDevBgn); + ASSERT_EQ(TargetDevEnd, TargetDevEnd); +} + +TEST(CompareOperatorTests, BufferRecordIdentity) { + // Default, no time limit or anything + auto BRDflt = + OAE::BufferRecord("dflt", "", OS::always, ompt_callback_target_submit); + + // Minimum time set, no max time + auto BRMinSet = OAE::BufferRecord("minset", "", OS::always, + ompt_callback_target_submit, 10); + + // Minimum time and maximum time set + auto BRMinMaxSet = OAE::BufferRecord("minmaxset", "", OS::always, + ompt_callback_target_submit, {10, 100}); + + ASSERT_EQ(BRDflt, BRDflt); + ASSERT_EQ(BRMinSet, BRMinSet); + ASSERT_EQ(BRMinMaxSet, BRMinMaxSet); +} + +// Add main definition +OMPTEST_TESTSUITE_MAIN() >From daed79530ad6995f883fd6b39179f8433201d201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Halkenh=C3=A4user?= Date: Tue, 15 Jul 2025 18:08:58 +0200 Subject: [PATCH 2/2] Update openmp/tools/omptest/CMakeLists.txt Co-authored-by: Joachim --- openmp/tools/omptest/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/tools/omptest/CMakeLists.txt b/openmp/tools/omptest/CMakeLists.txt index 19f9f898f4300..c5ea5e4faca9a 100644 --- a/openmp/tools/omptest/CMakeLists.txt +++ b/openmp/tools/omptest/CMakeLists.txt @@ -8,7 +8,7 @@ cmake_minimum_required(VERSION 3.22) project(omptest LANGUAGES CXX) option(LIBOMPTEST_BUILD_STANDALONE - "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) + "Build ompTest 'standalone', i.e. w/o GoogleTest." ${OPENMP_STANDALONE_BUILD}) option(LIBOMPTEST_BUILD_UNITTESTS "Build ompTest's unit tests , requires GoogleTest." OFF) From openmp-commits at lists.llvm.org Tue Jul 15 09:16:04 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?B?Um9nZXIgRmVycmVyIEliw6HDsWV6?= via Openmp-commits) Date: Tue, 15 Jul 2025 09:16:04 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <68767ec4.170a0220.34c7f3.4cc6@mx.google.com> rofirrim wrote: I'm a bit uncertain with what we want to do with `NumGeneratedLoopNests` and `NumGeneratedLoops`. I understand that, outside of dependent contexts, this is some sort of synthesised attribute (in the base case from analysing the loop nests / canonical loop sequences) that can be used by an enclosing loop transformation to check it is still valid. I wonder if an alternative approach is using a list of integers, one per loop representing the depth of the canonical loop contained in there. In lack of a better name, let's call this the GeneratedLoopSequence (`gls` in the examples, read the examples bottom-up) ```cpp // after unroll gls = [], because it is not partial and there may not be loop anymore #pragma omp unroll // after fuse gls = [ 1 ] #pragma omp fuse // from syntax gls = [ 1, 1 ] { for (...) { } for (...) { } } ``` ```cpp // after fuse gls = [ 6, 1 ] #pragma omp fuse looprange(2, 2) // from syntax gls = [ 6, 1, 1 ] { // after tile gls = [ 6 ] #pragma omp tile sizes(x, y, z) // from syntax gls = [ 3 ] for (...) { for (...) { for (...) { } } } // from syntax gls = [ 1 ] for (...) { } // from syntax gls = [ 1 ] for (...) { } } ``` ```cpp // after split gls = [ 1, 1] #pragma omp split counts(a, b) // from syntax, gls = [ 1 ] for (...) { } ``` (For dependent contexts I was thinking on making the GeneratedLoopSequence an `std::optional`, so it is explicitly absent and can be told apart from `[]`) But I wonder if this approach is enough. I was considering the `apply` clause, when we get to implement it. And maybe a list of integers is not enough? ```cpp // after apply(unroll) gls = [] // after split gls = [ 1, 1 ] #pragma omp split counts(a, b) apply(unroll) // from syntax, gls = [ 1 ] for (...) { } ``` ```cpp // after apply(unroll(2)), non-partial unroll the second loop, gls = [1, ?not a loop anymore? ] // after split gls = [ 1, 1 ] #pragma omp split counts(a, b) apply(unroll(2)) // from syntax, gls = [ 1 ] for (...) { } ``` ```cpp // after apply(split(2) counts(c, d)), gls = [1, [1, 1] ] (?) // after split gls = [ 1, 1 ] #pragma omp split counts(a, b) apply(split(2) counts(c, d)) // from syntax, gls = [ 1 ] for (...) { } ``` ```cpp // after apply(split counts(c, d)), gls = [[1, 1], [1, 1]] (???) // after split gls = [ 1, 1 ] #pragma omp split counts(a, b) apply(split counts(c, d)) // from syntax, gls = [ 1 ] for (...) { } ``` Maybe there is no need to recursively represent all the nested transformation? Other examples, from OpenMP, seem OK: ```cpp void span_apply(double A[128][128]) { // this is not a loop transformation but this is fine because gls is a singleton // and collapse is 2 ≤ 4 #pragma omp for collapse(2) // from apply(grid: reverse, interchange) (this affects the first two loops) gls = [ 4 ] // from tile gls = [ 4 ] #pragma omp tile sizes(16,16) apply(grid: interchange,reverse) // from syntax gls = [ 2 ] for (int i = 0; i < 128; ++i) for (int j = 0; j < 128; ++j) A[i][j] = A[i][j] + 1; } ``` ```cpp void nested_apply(double A[100]) { // after apply(reverse), gls = [ 2 ] // after applyt(intratile: unroll partial(2)), gls = [ 2 ] // after tile: gls = [ 2 ] #pragma omp tile sizes(10) apply(intratile: unroll partial(2) apply(reverse)) // from syntax, gls = [ 1 ] for (int i = 0; i < 100; ++i) A[i] = A[i] + 1; } ``` Thoughts? https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Wed Jul 16 05:03:45 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Wed, 16 Jul 2025 05:03:45 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Simplify GNU strerror_r check for Android (PR #148990) In-Reply-To: Message-ID: <68779521.050a0220.84ed9.d52c@mx.google.com> ================ @@ -708,9 +708,7 @@ static char *sys_error(int err) { int strerror_r( int, char *, size_t ); // XSI version */ -#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || \ - (defined(__BIONIC__) && defined(_GNU_SOURCE) && \ - __ANDROID_API__ >= __ANDROID_API_M__) ---------------- enh-google wrote: openmp is distributed as part of the NDK, so i think it's too early to drop apis 21 and 22. @pirama-arumuga-nainar though, because for all i know we already only build openmp at a higher api level, so it's already broken. actually, i can just check myself... ``` ~/Downloads/android-ndk-r28b$ ~/toybox/toybox readelf -aW ./toolchains/llvm/prebuilt/linux-x86_64/lib/clang/19/lib/linux/aarch64/libomp.so | grep NT_VERSION Android 0x00000084 NT_VERSION API level 21, NDK r27-beta1 (11883388) ~/Downloads/android-ndk-r28b$ ``` if you wanted to `extern "C"` strerror_r() yourself, the XSI one has always been available, so you could remove bionic from this gnu side altogether (at the cost of either a manual `extern "C"` or not building with `_GNU_SOURCE`). but it's probably easier to just wait? https://github.com/llvm/llvm-project/pull/148990 From openmp-commits at lists.llvm.org Wed Jul 16 10:43:19 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Wed, 16 Jul 2025 10:43:19 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877e4b7.050a0220.31cc34.58d1@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 01/10] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 02/10] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 03/10] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 04/10] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 05/10] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } >From fd69d49e8509161925d03015e5706c36a47b64b2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:17:47 +0200 Subject: [PATCH 06/10] remove unnecessary conditions --- offload/libomptarget/OpenMP/InteropAPI.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 57be23f10d24d..fa6325333c606 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -286,9 +286,6 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { @@ -322,9 +319,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) DP("Warning: nowait flag on interop destroy not supported " @@ -348,11 +342,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - if (!Interop) { - DP("Call to %s with invalid interop\n", __func__); - return omp_irc_empty; - } - Interop->addCompletionCb(cb, data); return omp_irc_success; >From fbca38468cd004afb311d5066abcc3c5ca96392e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:52:30 +0200 Subject: [PATCH 07/10] another corner case when unloading --- offload/libomptarget/PluginManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 2cc1314e7a4f0..f5d913f2b8909 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -132,7 +132,8 @@ void PluginManager::initializeAllDevices() { std::atexit([]() { // Interop cleanup should be done before the plugins are deinitialized as // the backend libraries may be already unloaded. - PM->InteropTbl.clear(); + if (PM) + PM->InteropTbl.clear(); }); } >From f5715cdccdbcf60f5ac81d93bff2c08059ef5dd2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 19:50:01 +0200 Subject: [PATCH 08/10] make version 32bits to simplify codegen --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 61cbedf06a9a6..3662d221e4bd0 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -51,7 +51,7 @@ struct interop_flags_t { }; struct interop_ctx_t { - uint16_t version; // version of the interface (current is 0) + uint32_t version; // version of the interface (current is 0) interop_flags_t flags; int gtid; }; >From 82fa72d175aa98a8983cc0365756aaa61a51a9c3 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 23:50:42 +0200 Subject: [PATCH 09/10] Fix sporadic race condition with helper threads on deinit --- offload/include/PluginManager.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 ++++++ offload/libomptarget/OpenMP/API.cpp | 9 ++++++++- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ea1f3b6406ce7..6c6fdebe76dff 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -191,4 +191,5 @@ void deinitRuntime(); extern PluginManager *PM; extern std::atomic RTLAlive; // Indicates if the RTL has been initialized +extern std::atomic RTLOngoingSyncs; // Counts ongoing external syncs #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 134ab7c95ac0b..04bd21ec91a49 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -23,6 +23,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; std::atomic RTLAlive{false}; +std::atomic RTLOngoingSyncs{0}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -56,6 +57,11 @@ void deinitRuntime() { DP("Deinit offload library!\n"); // RTL deinitialization has started RTLAlive = false; + while (RTLOngoingSyncs > 0) { + DP("Waiting for ongoing syncs to finish, count: %d\n", + RTLOngoingSyncs.load()); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index f61f56772504b..bffb92722a057 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -689,9 +689,16 @@ void syncImplicitInterops(int gtid, void *event); // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, void *event) { - if (!RTLAlive) return; + RTLOngoingSyncs++; + if (!RTLAlive) { + RTLOngoingSyncs--; + return; + } + syncImplicitInterops(gtid, event); + + RTLOngoingSyncs--; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index fa6325333c606..b174ec487a93a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -289,8 +289,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported - "yet. Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); nowait = false; } >From 0c29ac6c62d1da2a4b55a9d20489fccc22db4fc8 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 16 Jul 2025 19:43:02 +0200 Subject: [PATCH 10/10] change const to constexpr --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 3662d221e4bd0..2fbd6a2035e47 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -152,7 +152,7 @@ struct InteropTableEntry { ContainerTy Interops; - const int reservedEntriesPerThread = + static constexpr int reservedEntriesPerThread = 20; // reserve some entries to avoid reallocation void add(omp_interop_val_t *obj) { From openmp-commits at lists.llvm.org Wed Jul 16 11:16:35 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:35 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec83.170a0220.281cf2.2647@mx.google.com> ================ @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, ---------------- kevinsala wrote: Since this operation may fail in some backends in the future, it's better to use `Expected`, as some other functions do. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:35 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:35 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec83.170a0220.80819.c922@mx.google.com> ================ @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } ---------------- kevinsala wrote: Please implement these functions in `PluginInterface.cpp`. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:35 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:35 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec83.170a0220.a3385.29ca@mx.google.com> ================ @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); ---------------- kevinsala wrote: Please check for errors and report them in these functions. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.d92ed.2639@mx.google.com> ================ @@ -683,3 +683,22 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); ---------------- kevinsala wrote: Declare it in InteropAPI.h please. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.050a0220.3da530.533a@mx.google.com> ================ @@ -193,119 +194,276 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); + nowait = false; + } + + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); ---------------- kevinsala wrote: I guess the device can be passed directly to these functions. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.050a0220.29430b.62bf@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; ---------------- kevinsala wrote: I don't think the typedef is necessary. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.6783c.b17b@mx.google.com> ================ @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { ---------------- kevinsala wrote: Please use `Error` return type here instead of an integer. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.050a0220.38c4b.9b77@mx.google.com> ================ @@ -193,119 +194,276 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; ---------------- kevinsala wrote: `Nowait`. Similar to other instances. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.197f8b.d8ff@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = ---------------- kevinsala wrote: Can be ```constexpr```? https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.264abd.22e7@mx.google.com> ================ @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; ---------------- kevinsala wrote: This enum in OpenMP 6.0 is named `omp_interop_fr_t` and the values are `omp_ifr_`. What's the reason behind this renaming? https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.2cea77.1ff4@mx.google.com> ================ @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ ---------------- kevinsala wrote: ```/// Check if ...``` https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.38995b.2350@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); ---------------- kevinsala wrote: Is it worth having these 4 functions with and without Device parameter? I'd use directly the ones with device param. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.aac63.1bc5@mx.google.com> ================ @@ -193,119 +194,276 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); + nowait = false; + } + + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); ---------------- kevinsala wrote: If I understood correctly, each of these functions will try to get the device. Can we just get the device from the interop object and pass it to each of these devices? https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.050a0220.cabbc.55ef@mx.google.com> ================ @@ -193,119 +194,276 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); ---------------- kevinsala wrote: ``` toString(DeviceOrErr.takeError()).c_str() ``` https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.b733a.cab2@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ ---------------- kevinsala wrote: ```/// Vector interface.``` https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.170a0220.2b0b4d.d34c@mx.google.com> ================ @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { ---------------- kevinsala wrote: Please add asserts checking the interop pointers at this level. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:16:36 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:16:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ec84.050a0220.2a0154.8a58@mx.google.com> ================ @@ -193,119 +194,276 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); + nowait = false; + } + + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { ---------------- kevinsala wrote: ```Gtid, Event```. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:17:18 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Wed, 16 Jul 2025 11:17:18 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877ecae.170a0220.28c1fe.2373@mx.google.com> https://github.com/kevinsala edited https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Wed Jul 16 11:22:36 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Wed, 16 Jul 2025 11:22:36 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6877edec.050a0220.b9a54.7fba@mx.google.com> ================ @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } ---------------- jprotze wrote: Why? This header file contains many trivial function implementations. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:20:45 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:20:45 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dc8d.050a0220.1b247f.a3fe@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 01/14] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 02/14] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 03/14] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 04/14] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 05/14] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } >From fd69d49e8509161925d03015e5706c36a47b64b2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:17:47 +0200 Subject: [PATCH 06/14] remove unnecessary conditions --- offload/libomptarget/OpenMP/InteropAPI.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 57be23f10d24d..fa6325333c606 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -286,9 +286,6 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { @@ -322,9 +319,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) DP("Warning: nowait flag on interop destroy not supported " @@ -348,11 +342,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - if (!Interop) { - DP("Call to %s with invalid interop\n", __func__); - return omp_irc_empty; - } - Interop->addCompletionCb(cb, data); return omp_irc_success; >From fbca38468cd004afb311d5066abcc3c5ca96392e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:52:30 +0200 Subject: [PATCH 07/14] another corner case when unloading --- offload/libomptarget/PluginManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 2cc1314e7a4f0..f5d913f2b8909 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -132,7 +132,8 @@ void PluginManager::initializeAllDevices() { std::atexit([]() { // Interop cleanup should be done before the plugins are deinitialized as // the backend libraries may be already unloaded. - PM->InteropTbl.clear(); + if (PM) + PM->InteropTbl.clear(); }); } >From f5715cdccdbcf60f5ac81d93bff2c08059ef5dd2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 19:50:01 +0200 Subject: [PATCH 08/14] make version 32bits to simplify codegen --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 61cbedf06a9a6..3662d221e4bd0 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -51,7 +51,7 @@ struct interop_flags_t { }; struct interop_ctx_t { - uint16_t version; // version of the interface (current is 0) + uint32_t version; // version of the interface (current is 0) interop_flags_t flags; int gtid; }; >From 82fa72d175aa98a8983cc0365756aaa61a51a9c3 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 23:50:42 +0200 Subject: [PATCH 09/14] Fix sporadic race condition with helper threads on deinit --- offload/include/PluginManager.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 ++++++ offload/libomptarget/OpenMP/API.cpp | 9 ++++++++- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ea1f3b6406ce7..6c6fdebe76dff 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -191,4 +191,5 @@ void deinitRuntime(); extern PluginManager *PM; extern std::atomic RTLAlive; // Indicates if the RTL has been initialized +extern std::atomic RTLOngoingSyncs; // Counts ongoing external syncs #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 134ab7c95ac0b..04bd21ec91a49 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -23,6 +23,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; std::atomic RTLAlive{false}; +std::atomic RTLOngoingSyncs{0}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -56,6 +57,11 @@ void deinitRuntime() { DP("Deinit offload library!\n"); // RTL deinitialization has started RTLAlive = false; + while (RTLOngoingSyncs > 0) { + DP("Waiting for ongoing syncs to finish, count: %d\n", + RTLOngoingSyncs.load()); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index f61f56772504b..bffb92722a057 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -689,9 +689,16 @@ void syncImplicitInterops(int gtid, void *event); // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, void *event) { - if (!RTLAlive) return; + RTLOngoingSyncs++; + if (!RTLAlive) { + RTLOngoingSyncs--; + return; + } + syncImplicitInterops(gtid, event); + + RTLOngoingSyncs--; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index fa6325333c606..b174ec487a93a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -289,8 +289,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported - "yet. Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); nowait = false; } >From 0c29ac6c62d1da2a4b55a9d20489fccc22db4fc8 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 16 Jul 2025 19:43:02 +0200 Subject: [PATCH 10/14] change const to constexpr --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 3662d221e4bd0..2fbd6a2035e47 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -152,7 +152,7 @@ struct InteropTableEntry { ContainerTy Interops; - const int reservedEntriesPerThread = + static constexpr int reservedEntriesPerThread = 20; // reserve some entries to avoid reallocation void add(omp_interop_val_t *obj) { >From 0440af027394b21ad89639e61f8925d78a9cd884 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 10:59:56 +0200 Subject: [PATCH 11/14] address review comments --- offload/include/OpenMP/InteropAPI.h | 11 +++++---- offload/libomptarget/OpenMP/API.cpp | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 27 +++++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 2fbd6a2035e47..5b3c230900695 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -28,10 +28,9 @@ struct interop_attrs_t { bool inorder : 1; int reserved : 31; - /* Check if the supported attributes are compatible with the current - attributes. Only if an attribute is supported can the value be true, - otherwise it needs to be false - */ + /// Check if the supported attributes are compatible with the current + /// attributes. Only if an attribute is supported can the value be true, + /// otherwise it needs to be false bool checkSupportedOnly(interop_attrs_t supported) const { return supported.inorder || (!supported.inorder && !inorder); } @@ -167,7 +166,7 @@ struct InteropTableEntry { } } - /* vector interface */ + /// vector interface int size() const { return Interops.size(); } iterator begin() { return Interops.begin(); } iterator end() { return Interops.end(); } @@ -179,4 +178,6 @@ struct InteropTblTy void clear(); }; +void syncImplicitInterops(int gtid, void *event); + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index bffb92722a057..b0f0573833713 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -16,6 +16,7 @@ #include "rtl.h" #include "OpenMP/InternalTypes.h" +#include "OpenMP/InteropAPI.h" #include "OpenMP/Mapping.h" #include "OpenMP/OMPT/Interface.h" #include "OpenMP/omp.h" @@ -684,7 +685,6 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } -void syncImplicitInterops(int gtid, void *event); // This routine gets called from the Host RTL at sync points (taskwait, barrier, // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index b174ec487a93a..51d411e5ab49a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -227,10 +227,9 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) { - [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); DP("Couldn't find device %" PRId64 " while constructing interop object: %s\n", - DeviceNum, ErrStr.c_str()); + DeviceNum, toString(DeviceOrErr.takeError()).c_str()); return omp_interop_none; } auto &Device = *DeviceOrErr; @@ -280,18 +279,18 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, interop_ctx_t *Ctx, dep_pack_t *Deps) { - bool nowait = Ctx->flags.nowait; + bool Nowait = Ctx->flags.nowait; DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, - DPxPTR(Interop), nowait); + DPxPTR(Interop), Nowait); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { - if (nowait) { + if (Nowait) { DP("Warning: nowait flag on interop use with dependences not supported" "yet. Ignored\n"); - nowait = false; + Nowait = false; } __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, @@ -300,7 +299,7 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, } if (Interop->async_info && Interop->async_info->Queue) { - if (nowait) + if (Nowait) Interop->asyncBarrier(); else { Interop->flush(); @@ -333,16 +332,16 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, } EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, - ompx_interop_cb_t *cb, - void *data) { + ompx_interop_cb_t *CB, + void *Data) { DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD "and data " DPxMOD "\n", - __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + __func__, DPxPTR(Interop), DPxPTR(CB), DPxPTR(Data)); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - Interop->addCompletionCb(cb, data); + Interop->addCompletionCb(CB, Data); return omp_irc_success; } @@ -433,15 +432,15 @@ int32_t omp_interop_val_t::release() { return release(Device); } -void syncImplicitInterops(int gtid, void *event) { +void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", - gtid, DPxPTR(event)); + Gtid, DPxPTR(Event)); for (auto iop : PM->InteropTbl) { - if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && !iop->isClean()) { iop->flush(); >From 85eb7a7163f1711b90a20cb08dcdd1175682a3e5 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 11:50:42 +0200 Subject: [PATCH 12/14] Add asserts; Bury virtual interfaces --- .../common/include/PluginInterface.h | 117 ++++++++++++------ 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 40a428dbccb06..9e16efd37554b 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -60,6 +60,39 @@ struct GenericKernelTy; struct GenericDeviceTy; struct RecordReplayTy; +namespace Plugin { +/// Create a success error. This is the same as calling Error::success(), but +/// it is recommended to use this one for consistency with Plugin::error() and +/// Plugin::check(). +static inline Error success() { return Error::success(); } + +/// Create an Offload error. +template +static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) { + return error::createOffloadError(Code, ErrFmt, Args...); +} + +inline Error error(error::ErrorCode Code, const char *S) { + return make_error(Code, S); +} + +inline Error error(error::ErrorCode Code, Error &&OtherError, + const char *Context) { + return error::createOffloadError(Code, std::move(OtherError), Context); +} + +/// Check the plugin-specific error code and return an error or success +/// accordingly. In case of an error, create a string error with the error +/// description. The ErrFmt should follow the format: +/// "Error in []: %s" +/// The last format specifier "%s" is mandatory and will be used to place the +/// error code's description. Notice this function should be only called from +/// the plugin-specific code. +/// TODO: Refactor this, must be defined individually by each plugin. +template +static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); +} // namespace Plugin + /// Class that wraps the __tgt_async_info to simply its usage. In case the /// object is constructed without a valid __tgt_async_info, the object will use /// an internal one and will synchronize the current thread with the pending @@ -1219,6 +1252,20 @@ struct GenericPluginTy { virtual Expected isELFCompatible(uint32_t DeviceID, StringRef Image) const = 0; + virtual Error flushQueueImpl(omp_interop_val_t *Interop) { + return Plugin::success(); + } + + virtual Error syncBarrierImpl(omp_interop_val_t *Interop) { + return Plugin::error(error::ErrorCode::UNSUPPORTED, + "sync_barrier not supported"); + } + + virtual Error asyncBarrierImpl(omp_interop_val_t *Interop) { + return Plugin::error(error::ErrorCode::UNSUPPORTED, + "async_barrier not supported"); + } + protected: /// Indicate whether a device id is valid. bool isValidDeviceId(int32_t DeviceId) const { @@ -1370,31 +1417,54 @@ struct GenericPluginTy { /// Create OpenMP interop with the given interop context omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, interop_spec_t *InteropSpec) { + assert(InteropSpec && "Interop spec is null"); auto &Device = getDevice(ID); return Device.createInterop(InteropContext, *InteropSpec); } /// Release OpenMP interop object int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + assert(Interop->DeviceId == ID && "Interop does not match device id"); auto &Device = getDevice(ID); return Device.releaseInterop(Interop); } /// Flush the queue associated with the interop object if necessary - virtual int32_t flush_queue(omp_interop_val_t *Interop) { + int32_t flush_queue(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = flushQueueImpl(Interop); + if (Err) { + REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } return OFFLOAD_SUCCESS; } - - /// Queue a synchronous barrier in the queue associated with the interop + /// Perform a host synchronization with the queue associated with the interop /// object and wait for it to complete. - virtual int32_t sync_barrier(omp_interop_val_t *Interop) { - return OFFLOAD_FAIL; + int32_t sync_barrier(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = syncBarrierImpl(Interop); + if (Err) { + REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } /// Queue an asynchronous barrier in the queue associated with the interop /// object and return immediately. - virtual int32_t async_barrier(omp_interop_val_t *Interop) { - return OFFLOAD_FAIL; + int32_t async_barrier(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = asyncBarrierImpl(Interop); + if (Err) { + REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } private: @@ -1429,39 +1499,6 @@ struct GenericPluginTy { RecordReplayTy *RecordReplay; }; -namespace Plugin { -/// Create a success error. This is the same as calling Error::success(), but -/// it is recommended to use this one for consistency with Plugin::error() and -/// Plugin::check(). -static inline Error success() { return Error::success(); } - -/// Create an Offload error. -template -static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) { - return error::createOffloadError(Code, ErrFmt, Args...); -} - -inline Error error(error::ErrorCode Code, const char *S) { - return make_error(Code, S); -} - -inline Error error(error::ErrorCode Code, Error &&OtherError, - const char *Context) { - return error::createOffloadError(Code, std::move(OtherError), Context); -} - -/// Check the plugin-specific error code and return an error or success -/// accordingly. In case of an error, create a string error with the error -/// description. The ErrFmt should follow the format: -/// "Error in []: %s" -/// The last format specifier "%s" is mandatory and will be used to place the -/// error code's description. Notice this function should be only called from -/// the plugin-specific code. -/// TODO: Refactor this, must be defined individually by each plugin. -template -static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); -} // namespace Plugin - /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class /// acts as a reference to a device resource, such as a stream, and requires /// some basic functions to be implemented. The derived class should define an >From 5774a89037b0824f88c0a71b7293d1c71bfc77ab Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 11:56:43 +0200 Subject: [PATCH 13/14] Add error handling to create/releaseInterop --- .../common/include/PluginInterface.h | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 9e16efd37554b..dd565e6f6ca27 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -971,13 +971,13 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } - virtual omp_interop_val_t *createInterop(int32_t InteropType, + virtual Expected createInterop(int32_t InteropType, interop_spec_t &InteropSpec) { return nullptr; } - virtual int32_t releaseInterop(omp_interop_val_t *Interop) { - return OFFLOAD_SUCCESS; + virtual Error releaseInterop(omp_interop_val_t *Interop) { + return Plugin::success(); } virtual interop_spec_t selectInteropPreference(int32_t InteropType, @@ -1419,7 +1419,13 @@ struct GenericPluginTy { interop_spec_t *InteropSpec) { assert(InteropSpec && "Interop spec is null"); auto &Device = getDevice(ID); - return Device.createInterop(InteropContext, *InteropSpec); + auto InteropOrErr = Device.createInterop(InteropContext, *InteropSpec); + if (!InteropOrErr) { + REPORT("Failure to create interop object for device " DPxMOD ": %s\n", + DPxPTR(InteropSpec), toString(InteropOrErr.takeError()).c_str()); + return nullptr; + } + return *InteropOrErr; } /// Release OpenMP interop object @@ -1427,7 +1433,13 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); assert(Interop->DeviceId == ID && "Interop does not match device id"); auto &Device = getDevice(ID); - return Device.releaseInterop(Interop); + auto Err = Device.releaseInterop(Interop); + if (Err) { + REPORT("Failure to release interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } /// Flush the queue associated with the interop object if necessary >From a1f81c3d92e2bf3208ed8780853634a8e0eaa8ef Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 13:08:56 +0200 Subject: [PATCH 14/14] Remove interfaces with implicity DeviceTy from interop object --- offload/include/OpenMP/InteropAPI.h | 7 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 85 +++++++++++----------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 5b3c230900695..d23e507f48546 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -108,6 +108,8 @@ typedef struct omp_interop_val_t { clearCompletionCbs(); } + llvm::Expected getDevice() const; + bool hasOwner() const { return OwnerGtid != -1; } void setOwner(int gtid) { OwnerGtid = gtid; } @@ -124,11 +126,6 @@ typedef struct omp_interop_val_t { int32_t async_barrier(DeviceTy &Device); int32_t release(DeviceTy &Device); - int32_t flush(); - int32_t syncBarrier(); - int32_t asyncBarrier(); - int32_t release(); - void addCompletionCb(ompx_interop_cb_t *cb, void *data) { CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 51d411e5ab49a..69c6469f9512f 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -298,12 +298,20 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, } } + auto DeviceOrErr = Interop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + return OFFLOAD_FAIL; + } + auto &IOPDevice = *DeviceOrErr; + if (Interop->async_info && Interop->async_info->Queue) { if (Nowait) - Interop->asyncBarrier(); + Interop->async_barrier(IOPDevice); else { - Interop->flush(); - Interop->syncBarrier(); + Interop->flush(IOPDevice); + Interop->sync_barrier(IOPDevice); Interop->markClean(); } } @@ -328,7 +336,14 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, } } - return Interop->release(); + auto DeviceOrErr = Interop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + return OFFLOAD_FAIL; + } + + return Interop->release(*DeviceOrErr); } EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, @@ -348,6 +363,10 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, } // extern "C" +llvm::Expected omp_interop_val_t::getDevice() const { + return PM->getDevice(device_id); +} + bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec) { if (interop_type != InteropType) @@ -394,44 +413,12 @@ int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { int32_t omp_interop_val_t::release(DeviceTy &Device) { if (async_info != nullptr && (!hasOwner() || !isClean())) { - flush(); - syncBarrier(); + flush(Device); + sync_barrier(Device); } return Device.RTL->release_interop(device_id, this); } -int32_t omp_interop_val_t::flush() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return flush(Device); -} - -int32_t omp_interop_val_t::syncBarrier() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return sync_barrier(Device); -} - -int32_t omp_interop_val_t::asyncBarrier() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return async_barrier(Device); -} - -int32_t omp_interop_val_t::release() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return release(Device); -} - void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; @@ -443,8 +430,16 @@ void syncImplicitInterops(int Gtid, void *Event) { if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && !iop->isClean()) { - iop->flush(); - iop->syncBarrier(); + auto DeviceOrErr = iop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(iop), toString(DeviceOrErr.takeError()).c_str()); + continue; + } + auto &IOPDevice = *DeviceOrErr; + + iop->flush(IOPDevice); + iop->sync_barrier(IOPDevice); iop->markClean(); // TODO: Alternate implementation option @@ -464,5 +459,13 @@ void syncImplicitInterops(int Gtid, void *Event) { void InteropTblTy::clear() { DP("Clearing Interop Table\n"); - PerThreadTable::clear([](auto &IOP) { IOP->release(); }); + PerThreadTable::clear([](auto &IOP) { + auto DeviceOrErr = IOP->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(IOP), toString(DeviceOrErr.takeError()).c_str()); + return; + } + IOP->release(*DeviceOrErr); + }); } From openmp-commits at lists.llvm.org Thu Jul 17 04:21:15 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:15 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcab.a70a0220.31a317.89ea@mx.google.com> https://github.com/adurang commented: I made most of the changes. I put some extra comments in some of your observations. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:15 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:15 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcab.630a0220.1f27ee.902d@mx.google.com> ================ @@ -193,119 +194,276 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); + nowait = false; + } + + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); ---------------- adurang wrote: done https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:16 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:16 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcac.170a0220.38f45d.327d@mx.google.com> ================ @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; ---------------- adurang wrote: There is no omp_ifr_rt. This omp.h is not the omp.h defined by the spec (maybe it should be called omp-defs.h). I don't think these should be labeled interop as it's not an exclusive thing to interop to be able to describe the backends if need to be (from an OpenMP program interop is the only way to talk about them, but not in the implementation). Maybe a less confusing alternative would be to drop the omp_ prefix and use someother one (e.g., tgt_). Would that be ok with you? https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:16 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:16 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcac.630a0220.1c09f0.86fe@mx.google.com> https://github.com/adurang edited https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:16 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:16 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcac.050a0220.29dcb3.987c@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); ---------------- adurang wrote: Ok, I removed them and just added a wrapper to get the DeviceTy from the Id. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:16 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:16 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcac.050a0220.3b8c77.8974@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes ---------------- adurang wrote: You're not wrong here... but the other existing fields are xxx_yyy. AFAIS, lower case kind of fields are more part of the interface (which impl_attrs is). The others are more internal fields. Shall I rename all fields one way or the other? https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:16 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:16 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcac.170a0220.a6617.2fe6@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; ---------------- adurang wrote: No typedef is necessary. Yet, I find it's always better to define typedefs for containers as makes codemuch easier to mantain over the long run. Yes, now it's just one line below so it might seem a bit dumb but as code evolves that might no longer be true so unless there's a technical reason I'd rather setup it like this for the future. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:21:18 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:21:18 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878dcae.170a0220.1c5c7a.3014@mx.google.com> ================ @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { ---------------- adurang wrote: Yes, the interop object contains de device information https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 04:28:34 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 04:28:34 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <6878de62.050a0220.2fb0cf.b190@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 01/15] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 02/15] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 03/15] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 04/15] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 05/15] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } >From fd69d49e8509161925d03015e5706c36a47b64b2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:17:47 +0200 Subject: [PATCH 06/15] remove unnecessary conditions --- offload/libomptarget/OpenMP/InteropAPI.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 57be23f10d24d..fa6325333c606 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -286,9 +286,6 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { @@ -322,9 +319,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) DP("Warning: nowait flag on interop destroy not supported " @@ -348,11 +342,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - if (!Interop) { - DP("Call to %s with invalid interop\n", __func__); - return omp_irc_empty; - } - Interop->addCompletionCb(cb, data); return omp_irc_success; >From fbca38468cd004afb311d5066abcc3c5ca96392e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:52:30 +0200 Subject: [PATCH 07/15] another corner case when unloading --- offload/libomptarget/PluginManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 2cc1314e7a4f0..f5d913f2b8909 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -132,7 +132,8 @@ void PluginManager::initializeAllDevices() { std::atexit([]() { // Interop cleanup should be done before the plugins are deinitialized as // the backend libraries may be already unloaded. - PM->InteropTbl.clear(); + if (PM) + PM->InteropTbl.clear(); }); } >From f5715cdccdbcf60f5ac81d93bff2c08059ef5dd2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 19:50:01 +0200 Subject: [PATCH 08/15] make version 32bits to simplify codegen --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 61cbedf06a9a6..3662d221e4bd0 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -51,7 +51,7 @@ struct interop_flags_t { }; struct interop_ctx_t { - uint16_t version; // version of the interface (current is 0) + uint32_t version; // version of the interface (current is 0) interop_flags_t flags; int gtid; }; >From 82fa72d175aa98a8983cc0365756aaa61a51a9c3 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 23:50:42 +0200 Subject: [PATCH 09/15] Fix sporadic race condition with helper threads on deinit --- offload/include/PluginManager.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 ++++++ offload/libomptarget/OpenMP/API.cpp | 9 ++++++++- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ea1f3b6406ce7..6c6fdebe76dff 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -191,4 +191,5 @@ void deinitRuntime(); extern PluginManager *PM; extern std::atomic RTLAlive; // Indicates if the RTL has been initialized +extern std::atomic RTLOngoingSyncs; // Counts ongoing external syncs #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 134ab7c95ac0b..04bd21ec91a49 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -23,6 +23,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; std::atomic RTLAlive{false}; +std::atomic RTLOngoingSyncs{0}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -56,6 +57,11 @@ void deinitRuntime() { DP("Deinit offload library!\n"); // RTL deinitialization has started RTLAlive = false; + while (RTLOngoingSyncs > 0) { + DP("Waiting for ongoing syncs to finish, count: %d\n", + RTLOngoingSyncs.load()); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index f61f56772504b..bffb92722a057 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -689,9 +689,16 @@ void syncImplicitInterops(int gtid, void *event); // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, void *event) { - if (!RTLAlive) return; + RTLOngoingSyncs++; + if (!RTLAlive) { + RTLOngoingSyncs--; + return; + } + syncImplicitInterops(gtid, event); + + RTLOngoingSyncs--; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index fa6325333c606..b174ec487a93a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -289,8 +289,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported - "yet. Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); nowait = false; } >From 0c29ac6c62d1da2a4b55a9d20489fccc22db4fc8 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 16 Jul 2025 19:43:02 +0200 Subject: [PATCH 10/15] change const to constexpr --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 3662d221e4bd0..2fbd6a2035e47 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -152,7 +152,7 @@ struct InteropTableEntry { ContainerTy Interops; - const int reservedEntriesPerThread = + static constexpr int reservedEntriesPerThread = 20; // reserve some entries to avoid reallocation void add(omp_interop_val_t *obj) { >From 0440af027394b21ad89639e61f8925d78a9cd884 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 10:59:56 +0200 Subject: [PATCH 11/15] address review comments --- offload/include/OpenMP/InteropAPI.h | 11 +++++---- offload/libomptarget/OpenMP/API.cpp | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 27 +++++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 2fbd6a2035e47..5b3c230900695 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -28,10 +28,9 @@ struct interop_attrs_t { bool inorder : 1; int reserved : 31; - /* Check if the supported attributes are compatible with the current - attributes. Only if an attribute is supported can the value be true, - otherwise it needs to be false - */ + /// Check if the supported attributes are compatible with the current + /// attributes. Only if an attribute is supported can the value be true, + /// otherwise it needs to be false bool checkSupportedOnly(interop_attrs_t supported) const { return supported.inorder || (!supported.inorder && !inorder); } @@ -167,7 +166,7 @@ struct InteropTableEntry { } } - /* vector interface */ + /// vector interface int size() const { return Interops.size(); } iterator begin() { return Interops.begin(); } iterator end() { return Interops.end(); } @@ -179,4 +178,6 @@ struct InteropTblTy void clear(); }; +void syncImplicitInterops(int gtid, void *event); + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index bffb92722a057..b0f0573833713 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -16,6 +16,7 @@ #include "rtl.h" #include "OpenMP/InternalTypes.h" +#include "OpenMP/InteropAPI.h" #include "OpenMP/Mapping.h" #include "OpenMP/OMPT/Interface.h" #include "OpenMP/omp.h" @@ -684,7 +685,6 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } -void syncImplicitInterops(int gtid, void *event); // This routine gets called from the Host RTL at sync points (taskwait, barrier, // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index b174ec487a93a..51d411e5ab49a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -227,10 +227,9 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) { - [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); DP("Couldn't find device %" PRId64 " while constructing interop object: %s\n", - DeviceNum, ErrStr.c_str()); + DeviceNum, toString(DeviceOrErr.takeError()).c_str()); return omp_interop_none; } auto &Device = *DeviceOrErr; @@ -280,18 +279,18 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, interop_ctx_t *Ctx, dep_pack_t *Deps) { - bool nowait = Ctx->flags.nowait; + bool Nowait = Ctx->flags.nowait; DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, - DPxPTR(Interop), nowait); + DPxPTR(Interop), Nowait); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { - if (nowait) { + if (Nowait) { DP("Warning: nowait flag on interop use with dependences not supported" "yet. Ignored\n"); - nowait = false; + Nowait = false; } __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, @@ -300,7 +299,7 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, } if (Interop->async_info && Interop->async_info->Queue) { - if (nowait) + if (Nowait) Interop->asyncBarrier(); else { Interop->flush(); @@ -333,16 +332,16 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, } EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, - ompx_interop_cb_t *cb, - void *data) { + ompx_interop_cb_t *CB, + void *Data) { DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD "and data " DPxMOD "\n", - __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + __func__, DPxPTR(Interop), DPxPTR(CB), DPxPTR(Data)); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - Interop->addCompletionCb(cb, data); + Interop->addCompletionCb(CB, Data); return omp_irc_success; } @@ -433,15 +432,15 @@ int32_t omp_interop_val_t::release() { return release(Device); } -void syncImplicitInterops(int gtid, void *event) { +void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", - gtid, DPxPTR(event)); + Gtid, DPxPTR(Event)); for (auto iop : PM->InteropTbl) { - if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && !iop->isClean()) { iop->flush(); >From 85eb7a7163f1711b90a20cb08dcdd1175682a3e5 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 11:50:42 +0200 Subject: [PATCH 12/15] Add asserts; Bury virtual interfaces --- .../common/include/PluginInterface.h | 117 ++++++++++++------ 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 40a428dbccb06..9e16efd37554b 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -60,6 +60,39 @@ struct GenericKernelTy; struct GenericDeviceTy; struct RecordReplayTy; +namespace Plugin { +/// Create a success error. This is the same as calling Error::success(), but +/// it is recommended to use this one for consistency with Plugin::error() and +/// Plugin::check(). +static inline Error success() { return Error::success(); } + +/// Create an Offload error. +template +static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) { + return error::createOffloadError(Code, ErrFmt, Args...); +} + +inline Error error(error::ErrorCode Code, const char *S) { + return make_error(Code, S); +} + +inline Error error(error::ErrorCode Code, Error &&OtherError, + const char *Context) { + return error::createOffloadError(Code, std::move(OtherError), Context); +} + +/// Check the plugin-specific error code and return an error or success +/// accordingly. In case of an error, create a string error with the error +/// description. The ErrFmt should follow the format: +/// "Error in []: %s" +/// The last format specifier "%s" is mandatory and will be used to place the +/// error code's description. Notice this function should be only called from +/// the plugin-specific code. +/// TODO: Refactor this, must be defined individually by each plugin. +template +static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); +} // namespace Plugin + /// Class that wraps the __tgt_async_info to simply its usage. In case the /// object is constructed without a valid __tgt_async_info, the object will use /// an internal one and will synchronize the current thread with the pending @@ -1219,6 +1252,20 @@ struct GenericPluginTy { virtual Expected isELFCompatible(uint32_t DeviceID, StringRef Image) const = 0; + virtual Error flushQueueImpl(omp_interop_val_t *Interop) { + return Plugin::success(); + } + + virtual Error syncBarrierImpl(omp_interop_val_t *Interop) { + return Plugin::error(error::ErrorCode::UNSUPPORTED, + "sync_barrier not supported"); + } + + virtual Error asyncBarrierImpl(omp_interop_val_t *Interop) { + return Plugin::error(error::ErrorCode::UNSUPPORTED, + "async_barrier not supported"); + } + protected: /// Indicate whether a device id is valid. bool isValidDeviceId(int32_t DeviceId) const { @@ -1370,31 +1417,54 @@ struct GenericPluginTy { /// Create OpenMP interop with the given interop context omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, interop_spec_t *InteropSpec) { + assert(InteropSpec && "Interop spec is null"); auto &Device = getDevice(ID); return Device.createInterop(InteropContext, *InteropSpec); } /// Release OpenMP interop object int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + assert(Interop->DeviceId == ID && "Interop does not match device id"); auto &Device = getDevice(ID); return Device.releaseInterop(Interop); } /// Flush the queue associated with the interop object if necessary - virtual int32_t flush_queue(omp_interop_val_t *Interop) { + int32_t flush_queue(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = flushQueueImpl(Interop); + if (Err) { + REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } return OFFLOAD_SUCCESS; } - - /// Queue a synchronous barrier in the queue associated with the interop + /// Perform a host synchronization with the queue associated with the interop /// object and wait for it to complete. - virtual int32_t sync_barrier(omp_interop_val_t *Interop) { - return OFFLOAD_FAIL; + int32_t sync_barrier(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = syncBarrierImpl(Interop); + if (Err) { + REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } /// Queue an asynchronous barrier in the queue associated with the interop /// object and return immediately. - virtual int32_t async_barrier(omp_interop_val_t *Interop) { - return OFFLOAD_FAIL; + int32_t async_barrier(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = asyncBarrierImpl(Interop); + if (Err) { + REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } private: @@ -1429,39 +1499,6 @@ struct GenericPluginTy { RecordReplayTy *RecordReplay; }; -namespace Plugin { -/// Create a success error. This is the same as calling Error::success(), but -/// it is recommended to use this one for consistency with Plugin::error() and -/// Plugin::check(). -static inline Error success() { return Error::success(); } - -/// Create an Offload error. -template -static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) { - return error::createOffloadError(Code, ErrFmt, Args...); -} - -inline Error error(error::ErrorCode Code, const char *S) { - return make_error(Code, S); -} - -inline Error error(error::ErrorCode Code, Error &&OtherError, - const char *Context) { - return error::createOffloadError(Code, std::move(OtherError), Context); -} - -/// Check the plugin-specific error code and return an error or success -/// accordingly. In case of an error, create a string error with the error -/// description. The ErrFmt should follow the format: -/// "Error in []: %s" -/// The last format specifier "%s" is mandatory and will be used to place the -/// error code's description. Notice this function should be only called from -/// the plugin-specific code. -/// TODO: Refactor this, must be defined individually by each plugin. -template -static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); -} // namespace Plugin - /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class /// acts as a reference to a device resource, such as a stream, and requires /// some basic functions to be implemented. The derived class should define an >From 5774a89037b0824f88c0a71b7293d1c71bfc77ab Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 11:56:43 +0200 Subject: [PATCH 13/15] Add error handling to create/releaseInterop --- .../common/include/PluginInterface.h | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 9e16efd37554b..dd565e6f6ca27 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -971,13 +971,13 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } - virtual omp_interop_val_t *createInterop(int32_t InteropType, + virtual Expected createInterop(int32_t InteropType, interop_spec_t &InteropSpec) { return nullptr; } - virtual int32_t releaseInterop(omp_interop_val_t *Interop) { - return OFFLOAD_SUCCESS; + virtual Error releaseInterop(omp_interop_val_t *Interop) { + return Plugin::success(); } virtual interop_spec_t selectInteropPreference(int32_t InteropType, @@ -1419,7 +1419,13 @@ struct GenericPluginTy { interop_spec_t *InteropSpec) { assert(InteropSpec && "Interop spec is null"); auto &Device = getDevice(ID); - return Device.createInterop(InteropContext, *InteropSpec); + auto InteropOrErr = Device.createInterop(InteropContext, *InteropSpec); + if (!InteropOrErr) { + REPORT("Failure to create interop object for device " DPxMOD ": %s\n", + DPxPTR(InteropSpec), toString(InteropOrErr.takeError()).c_str()); + return nullptr; + } + return *InteropOrErr; } /// Release OpenMP interop object @@ -1427,7 +1433,13 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); assert(Interop->DeviceId == ID && "Interop does not match device id"); auto &Device = getDevice(ID); - return Device.releaseInterop(Interop); + auto Err = Device.releaseInterop(Interop); + if (Err) { + REPORT("Failure to release interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } /// Flush the queue associated with the interop object if necessary >From a1f81c3d92e2bf3208ed8780853634a8e0eaa8ef Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 13:08:56 +0200 Subject: [PATCH 14/15] Remove interfaces with implicity DeviceTy from interop object --- offload/include/OpenMP/InteropAPI.h | 7 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 85 +++++++++++----------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 5b3c230900695..d23e507f48546 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -108,6 +108,8 @@ typedef struct omp_interop_val_t { clearCompletionCbs(); } + llvm::Expected getDevice() const; + bool hasOwner() const { return OwnerGtid != -1; } void setOwner(int gtid) { OwnerGtid = gtid; } @@ -124,11 +126,6 @@ typedef struct omp_interop_val_t { int32_t async_barrier(DeviceTy &Device); int32_t release(DeviceTy &Device); - int32_t flush(); - int32_t syncBarrier(); - int32_t asyncBarrier(); - int32_t release(); - void addCompletionCb(ompx_interop_cb_t *cb, void *data) { CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 51d411e5ab49a..69c6469f9512f 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -298,12 +298,20 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, } } + auto DeviceOrErr = Interop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + return OFFLOAD_FAIL; + } + auto &IOPDevice = *DeviceOrErr; + if (Interop->async_info && Interop->async_info->Queue) { if (Nowait) - Interop->asyncBarrier(); + Interop->async_barrier(IOPDevice); else { - Interop->flush(); - Interop->syncBarrier(); + Interop->flush(IOPDevice); + Interop->sync_barrier(IOPDevice); Interop->markClean(); } } @@ -328,7 +336,14 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, } } - return Interop->release(); + auto DeviceOrErr = Interop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + return OFFLOAD_FAIL; + } + + return Interop->release(*DeviceOrErr); } EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, @@ -348,6 +363,10 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, } // extern "C" +llvm::Expected omp_interop_val_t::getDevice() const { + return PM->getDevice(device_id); +} + bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec) { if (interop_type != InteropType) @@ -394,44 +413,12 @@ int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { int32_t omp_interop_val_t::release(DeviceTy &Device) { if (async_info != nullptr && (!hasOwner() || !isClean())) { - flush(); - syncBarrier(); + flush(Device); + sync_barrier(Device); } return Device.RTL->release_interop(device_id, this); } -int32_t omp_interop_val_t::flush() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return flush(Device); -} - -int32_t omp_interop_val_t::syncBarrier() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return sync_barrier(Device); -} - -int32_t omp_interop_val_t::asyncBarrier() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return async_barrier(Device); -} - -int32_t omp_interop_val_t::release() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return release(Device); -} - void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; @@ -443,8 +430,16 @@ void syncImplicitInterops(int Gtid, void *Event) { if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && !iop->isClean()) { - iop->flush(); - iop->syncBarrier(); + auto DeviceOrErr = iop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(iop), toString(DeviceOrErr.takeError()).c_str()); + continue; + } + auto &IOPDevice = *DeviceOrErr; + + iop->flush(IOPDevice); + iop->sync_barrier(IOPDevice); iop->markClean(); // TODO: Alternate implementation option @@ -464,5 +459,13 @@ void syncImplicitInterops(int Gtid, void *Event) { void InteropTblTy::clear() { DP("Clearing Interop Table\n"); - PerThreadTable::clear([](auto &IOP) { IOP->release(); }); + PerThreadTable::clear([](auto &IOP) { + auto DeviceOrErr = IOP->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(IOP), toString(DeviceOrErr.takeError()).c_str()); + return; + } + IOP->release(*DeviceOrErr); + }); } >From 6395c457b0c93b30f75350b1edc2a7818842f385 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 13:28:17 +0200 Subject: [PATCH 15/15] Adjust format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 18 +++++++++--------- .../common/include/PluginInterface.h | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 69c6469f9512f..912dcaf4cdcd5 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -300,8 +300,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, auto DeviceOrErr = Interop->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(Interop), + toString(DeviceOrErr.takeError()).c_str()); return OFFLOAD_FAIL; } auto &IOPDevice = *DeviceOrErr; @@ -338,8 +338,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, auto DeviceOrErr = Interop->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(Interop), + toString(DeviceOrErr.takeError()).c_str()); return OFFLOAD_FAIL; } @@ -364,7 +364,7 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, } // extern "C" llvm::Expected omp_interop_val_t::getDevice() const { - return PM->getDevice(device_id); + return PM->getDevice(device_id); } bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, @@ -432,8 +432,8 @@ void syncImplicitInterops(int Gtid, void *Event) { auto DeviceOrErr = iop->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(iop), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(iop), + toString(DeviceOrErr.takeError()).c_str()); continue; } auto &IOPDevice = *DeviceOrErr; @@ -462,8 +462,8 @@ void InteropTblTy::clear() { PerThreadTable::clear([](auto &IOP) { auto DeviceOrErr = IOP->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(IOP), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(IOP), + toString(DeviceOrErr.takeError()).c_str()); return; } IOP->release(*DeviceOrErr); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index dd565e6f6ca27..d6a3fb95b1df0 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -971,8 +971,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } - virtual Expected createInterop(int32_t InteropType, - interop_spec_t &InteropSpec) { + virtual Expected + createInterop(int32_t InteropType, interop_spec_t &InteropSpec) { return nullptr; } @@ -1447,7 +1447,7 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); auto Err = flushQueueImpl(Interop); if (Err) { - REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", + REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", DPxPTR(Interop), toString(std::move(Err)).c_str()); return OFFLOAD_FAIL; } @@ -1459,7 +1459,7 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); auto Err = syncBarrierImpl(Interop); if (Err) { - REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", + REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", DPxPTR(Interop), toString(std::move(Err)).c_str()); return OFFLOAD_FAIL; } @@ -1472,7 +1472,7 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); auto Err = asyncBarrierImpl(Interop); if (Err) { - REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", + REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", DPxPTR(Interop), toString(std::move(Err)).c_str()); return OFFLOAD_FAIL; } From openmp-commits at lists.llvm.org Thu Jul 17 04:41:01 2025 From: openmp-commits at lists.llvm.org (Alexey Bataev via Openmp-commits) Date: Thu, 17 Jul 2025 04:41:01 -0700 (PDT) Subject: [Openmp-commits] [clang] [llvm] [openmp] [clang][OpenMP] New OpenMP 6.0 threadset clause (PR #135807) In-Reply-To: Message-ID: <6878e14d.050a0220.1e6614.83b4@mx.google.com> ================ @@ -1324,6 +1324,86 @@ class OMPDefaultClause : public OMPClause { } }; +/// This represents 'threadset' clause in the '#pragma omp task ...' directive. +/// +/// \code +/// #pragma omp task threadset(omp_pool) +/// \endcode +/// In this example directive '#pragma omp task' has simple 'threadset' +/// clause with kind 'omp_pool'. +class OMPThreadsetClause : public OMPClause { ---------------- alexey-bataev wrote: ```suggestion class OMPThreadsetClause final : public OMPClause { ``` https://github.com/llvm/llvm-project/pull/135807 From openmp-commits at lists.llvm.org Thu Jul 17 04:52:39 2025 From: openmp-commits at lists.llvm.org (Alexey Bataev via Openmp-commits) Date: Thu, 17 Jul 2025 04:52:39 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6878e407.170a0220.2f44aa.572c@mx.google.com> https://github.com/alexey-bataev commented: Some previous comments are not addressed yet https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Thu Jul 17 04:52:39 2025 From: openmp-commits at lists.llvm.org (Alexey Bataev via Openmp-commits) Date: Thu, 17 Jul 2025 04:52:39 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6878e407.630a0220.327294.83e4@mx.google.com> https://github.com/alexey-bataev edited https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Thu Jul 17 04:52:39 2025 From: openmp-commits at lists.llvm.org (Alexey Bataev via Openmp-commits) Date: Thu, 17 Jul 2025 04:52:39 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6878e407.170a0220.1d61e9.452c@mx.google.com> ================ @@ -1143,6 +1143,83 @@ class OMPFullClause final : public OMPNoChildClause { static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final + : public OMPClause { + friend class OMPClauseReader; + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; + + /// Number of looprange arguments (always 2: first, count) + static constexpr unsigned NumArgs = 2; + Stmt *Args[NumArgs] = {nullptr, nullptr}; + + /// Set looprange 'first' expression + void setFirst(Expr *E) { Args[0] = E; } + + /// Set looprange 'count' expression + void setCount(Expr *E) { Args[1] = E; } + + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + +public: + /// Build a 'looprange' clause AST node. + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, Expr* First, Expr* Count); ---------------- alexey-bataev wrote: Formatting? https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Thu Jul 17 06:04:54 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 06:04:54 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6878f4f6.050a0220.2ace57.994d@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file From openmp-commits at lists.llvm.org Thu Jul 17 06:07:20 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 17 Jul 2025 06:07:20 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6878f588.170a0220.1a48ca.5f38@mx.google.com> github-actions[bot] wrote: :warning: Python code formatter, darker found issues in your code. :warning:
You can test this locally with the following command: ``````````bash darker --check --diff -r HEAD~1...HEAD flang/test/lit.cfg.py ``````````
View the diff from darker here. ``````````diff --- lit.cfg.py 2025-07-17 12:09:57.000000 +0000 +++ lit.cfg.py 2025-07-17 13:06:52.649010 +0000 @@ -126,43 +126,57 @@ # Check for DEFAULT_SYSROOT, because when it is set -isysroot has no effect. if config.default_sysroot: config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") - -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) + +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", command=FindTool("flang"), extra_args=isysroot_flag + extra_intrinsics_search_args, @@ -172,16 +186,15 @@ "%flang_fc1", command=FindTool("flang"), extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", command=FindTool("flang"), extra_args=isysroot_flag, @@ -223,14 +236,14 @@ config.environment["LIBPGMATH"] = True # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") -else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: config.available_features.add("openmp_runtime") ``````````
https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Thu Jul 17 06:14:35 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 06:14:35 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6878f73b.630a0220.3b688f.9c68@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/2] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/2] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" From openmp-commits at lists.llvm.org Thu Jul 17 06:24:32 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 06:24:32 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6878f990.050a0220.3d472f.b96c@mx.google.com> Meinersbur wrote: > What's the main limitation here? If this is just a file dependency it should be identical to how all the OpenMP tests depend on `omp.h` being in the resource directory. `omp.h` is created by `configure_file` at configure time. No dependency other than `runtimes-configure` needed. > IMHO this is trivial if we do a runtimes build, since we can just require that `openmp;flang-rt` are in the same toolchain, With toolchain you mean a bootstrapping build with `LLVM_ENABLE_RUNTIMES=openmp;flang-rt` ? Don't forget the users of a Flang-standalone build (`cmake -S /flang`). > which then gives us well defined access to `openmp`'s CMake targets so long as it's listed before `flang-rt`. `check-flang` (LLVM_ENABLE_PROJECTS=flang) needs access to `libomp.mod` (LLVM_ENABLE_RUNTIMES=openmp) and the flang modules as well to work. https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Thu Jul 17 06:25:22 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 06:25:22 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6878f9c2.170a0220.22c35.3d0e@mx.google.com> ================ @@ -3979,6 +3979,16 @@ def fsyntax_only : Flag<["-"], "fsyntax-only">, Visibility<[ClangOption, CLOption, DXCOption, CC1Option, FC1Option, FlangOption]>, Group, HelpText<"Run the preprocessor, parser and semantic analysis stages">; + + +def fno_builtin_modules : Flag<["-"], "fno-builtin-modules">, + Visibility<[FC1Option]>, + HelpText<"Do not implicitly use builtin modules (for internal use only)">; ---------------- Meinersbur wrote: removed https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Thu Jul 17 07:00:33 2025 From: openmp-commits at lists.llvm.org (Robert Imschweiler via Openmp-commits) Date: Thu, 17 Jul 2025 07:00:33 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OpenMP][clang] 6.0: num_threads strict (part 2: device runtime) (PR #146404) In-Reply-To: Message-ID: <68790201.170a0220.311957.6698@mx.google.com> https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/146404 >From c559c2b55b05ccca41dfc150c32a0891626a1dc9 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Fri, 27 Jun 2025 07:54:07 -0500 Subject: [PATCH 1/2] [OpenMP][clang] 6.0: num_threads strict (part 2: device runtime) OpenMP 6.0 12.1.2 specifies the behavior of the strict modifier for the num_threads clause on parallel directives, along with the message and severity clauses. This commit implements necessary device runtime changes. --- offload/DeviceRTL/include/DeviceTypes.h | 6 ++ offload/DeviceRTL/src/Parallelism.cpp | 78 +++++++++++++++++++------ openmp/runtime/src/kmp.h | 1 + 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h index 2e5d92380f040..111143a5578f1 100644 --- a/offload/DeviceRTL/include/DeviceTypes.h +++ b/offload/DeviceRTL/include/DeviceTypes.h @@ -136,6 +136,12 @@ struct omp_lock_t { void *Lock; }; +// see definition in openmp/runtime kmp.h +typedef enum omp_severity_t { + severity_warning = 1, + severity_fatal = 2 +} omp_severity_t; + using InterWarpCopyFnTy = void (*)(void *src, int32_t warp_num); using ShuffleReductFnTy = void (*)(void *rhsData, int16_t lane_id, int16_t lane_offset, int16_t shortCircuit); diff --git a/offload/DeviceRTL/src/Parallelism.cpp b/offload/DeviceRTL/src/Parallelism.cpp index 08ce616aee1c4..78438a60454b8 100644 --- a/offload/DeviceRTL/src/Parallelism.cpp +++ b/offload/DeviceRTL/src/Parallelism.cpp @@ -45,7 +45,24 @@ using namespace ompx; namespace { -uint32_t determineNumberOfThreads(int32_t NumThreadsClause) { +void num_threads_strict_error(int32_t nt_strict, int32_t nt_severity, + const char *nt_message, int32_t requested, + int32_t actual) { + if (nt_message) + printf("%s\n", nt_message); + else + printf("The computed number of threads (%u) does not match the requested " + "number of threads (%d). Consider that it might not be supported " + "to select exactly %d threads on this target device.\n", + actual, requested, requested); + if (nt_severity == severity_fatal) + __builtin_trap(); +} + +uint32_t determineNumberOfThreads(int32_t NumThreadsClause, + int32_t nt_strict = false, + int32_t nt_severity = severity_fatal, + const char *nt_message = nullptr) { uint32_t NThreadsICV = NumThreadsClause != -1 ? NumThreadsClause : icv::NThreads; uint32_t NumThreads = mapping::getMaxTeamThreads(); @@ -55,13 +72,17 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) { // SPMD mode allows any number of threads, for generic mode we round down to a // multiple of WARPSIZE since it is legal to do so in OpenMP. - if (mapping::isSPMDMode()) - return NumThreads; + if (!mapping::isSPMDMode()) { + if (NumThreads < mapping::getWarpSize()) + NumThreads = 1; + else + NumThreads = (NumThreads & ~((uint32_t)mapping::getWarpSize() - 1)); + } - if (NumThreads < mapping::getWarpSize()) - NumThreads = 1; - else - NumThreads = (NumThreads & ~((uint32_t)mapping::getWarpSize() - 1)); + if (NumThreadsClause != -1 && nt_strict && + NumThreads != static_cast(NumThreadsClause)) + num_threads_strict_error(nt_strict, nt_severity, nt_message, + NumThreadsClause, NumThreads); return NumThreads; } @@ -82,12 +103,14 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) { extern "C" { -[[clang::always_inline]] void __kmpc_parallel_spmd(IdentTy *ident, - int32_t num_threads, - void *fn, void **args, - const int64_t nargs) { +[[clang::always_inline]] void +__kmpc_parallel_spmd(IdentTy *ident, int32_t num_threads, void *fn, void **args, + const int64_t nargs, int32_t nt_strict = false, + int32_t nt_severity = severity_fatal, + const char *nt_message = nullptr) { uint32_t TId = mapping::getThreadIdInBlock(); - uint32_t NumThreads = determineNumberOfThreads(num_threads); + uint32_t NumThreads = + determineNumberOfThreads(num_threads, nt_strict, nt_severity, nt_message); uint32_t PTeamSize = NumThreads == mapping::getMaxTeamThreads() ? 0 : NumThreads; // Avoid the race between the read of the `icv::Level` above and the write @@ -140,10 +163,11 @@ extern "C" { return; } -[[clang::always_inline]] void -__kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, - int32_t num_threads, int proc_bind, void *fn, - void *wrapper_fn, void **args, int64_t nargs) { +[[clang::always_inline]] void __kmpc_parallel_51( + IdentTy *ident, int32_t, int32_t if_expr, int32_t num_threads, + int proc_bind, void *fn, void *wrapper_fn, void **args, int64_t nargs, + int32_t nt_strict = false, int32_t nt_severity = severity_fatal, + const char *nt_message = nullptr) { uint32_t TId = mapping::getThreadIdInBlock(); // Assert the parallelism level is zero if disabled by the user. @@ -156,6 +180,12 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, // 3) nested parallel regions if (OMP_UNLIKELY(!if_expr || state::HasThreadState || (config::mayUseNestedParallelism() && icv::Level))) { + // OpenMP 6.0 12.1.2 requires the num_threads 'strict' modifier to also have + // effect when parallel execution is disabled by a corresponding if clause + // attached to the parallel directive. + if (nt_strict && num_threads > 1) + num_threads_strict_error(nt_strict, nt_severity, nt_message, num_threads, + 1); state::DateEnvironmentRAII DERAII(ident); ++icv::Level; invokeMicrotask(TId, 0, fn, args, nargs); @@ -169,12 +199,14 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, // This was moved to its own routine so it could be called directly // in certain situations to avoid resource consumption of unused // logic in parallel_51. - __kmpc_parallel_spmd(ident, num_threads, fn, args, nargs); + __kmpc_parallel_spmd(ident, num_threads, fn, args, nargs, nt_strict, + nt_severity, nt_message); return; } - uint32_t NumThreads = determineNumberOfThreads(num_threads); + uint32_t NumThreads = + determineNumberOfThreads(num_threads, nt_strict, nt_severity, nt_message); uint32_t MaxTeamThreads = mapping::getMaxTeamThreads(); uint32_t PTeamSize = NumThreads == MaxTeamThreads ? 0 : NumThreads; @@ -277,6 +309,16 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, __kmpc_end_sharing_variables(); } +[[clang::always_inline]] void __kmpc_parallel_60( + IdentTy *ident, int32_t id, int32_t if_expr, int32_t num_threads, + int proc_bind, void *fn, void *wrapper_fn, void **args, int64_t nargs, + int32_t nt_strict = false, int32_t nt_severity = severity_fatal, + const char *nt_message = nullptr) { + return __kmpc_parallel_51(ident, id, if_expr, num_threads, proc_bind, fn, + wrapper_fn, args, nargs, nt_strict, nt_severity, + nt_message); +} + [[clang::noinline]] bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) { // Work function and arguments for L1 parallel region. *WorkFn = state::ParallelRegionFn; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index f62cabee6ea84..815fe6732aed1 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4669,6 +4669,7 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { } // Support for error directive +// See definition in offload/DeviceRTL DeviceTypes.h typedef enum kmp_severity_t { severity_warning = 1, severity_fatal = 2 >From e7d236043312254601ea8662c8629aa1dce04ce6 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Mon, 30 Jun 2025 15:21:53 -0500 Subject: [PATCH 2/2] implement feedback --- offload/DeviceRTL/src/Parallelism.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/offload/DeviceRTL/src/Parallelism.cpp b/offload/DeviceRTL/src/Parallelism.cpp index 78438a60454b8..aa5e74029ec3e 100644 --- a/offload/DeviceRTL/src/Parallelism.cpp +++ b/offload/DeviceRTL/src/Parallelism.cpp @@ -45,9 +45,9 @@ using namespace ompx; namespace { -void num_threads_strict_error(int32_t nt_strict, int32_t nt_severity, - const char *nt_message, int32_t requested, - int32_t actual) { +void numThreadsStrictError(int32_t nt_strict, int32_t nt_severity, + const char *nt_message, int32_t requested, + int32_t actual) { if (nt_message) printf("%s\n", nt_message); else @@ -81,8 +81,8 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause, if (NumThreadsClause != -1 && nt_strict && NumThreads != static_cast(NumThreadsClause)) - num_threads_strict_error(nt_strict, nt_severity, nt_message, - NumThreadsClause, NumThreads); + numThreadsStrictError(nt_strict, nt_severity, nt_message, NumThreadsClause, + NumThreads); return NumThreads; } @@ -184,8 +184,7 @@ __kmpc_parallel_spmd(IdentTy *ident, int32_t num_threads, void *fn, void **args, // effect when parallel execution is disabled by a corresponding if clause // attached to the parallel directive. if (nt_strict && num_threads > 1) - num_threads_strict_error(nt_strict, nt_severity, nt_message, num_threads, - 1); + numThreadsStrictError(nt_strict, nt_severity, nt_message, num_threads, 1); state::DateEnvironmentRAII DERAII(ident); ++icv::Level; invokeMicrotask(TId, 0, fn, args, nargs); From openmp-commits at lists.llvm.org Thu Jul 17 07:01:49 2025 From: openmp-commits at lists.llvm.org (Robert Imschweiler via Openmp-commits) Date: Thu, 17 Jul 2025 07:01:49 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OpenMP][clang] 6.0: num_threads strict (part 2: device runtime) (PR #146404) In-Reply-To: Message-ID: <6879024d.170a0220.db915.3555@mx.google.com> ro-i wrote: rebased on current main so that the third PR (that depends on this one) can be rebased, too, to fix the test failure https://github.com/llvm/llvm-project/pull/146404 From openmp-commits at lists.llvm.org Thu Jul 17 07:48:29 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 07:48:29 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <68790d3d.630a0220.11fdb3.e122@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Thu Jul 17 07:51:09 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 07:51:09 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <68790ddd.050a0220.19a51b.d2ae@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Thu Jul 17 08:06:50 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 08:06:50 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6879118a.170a0220.2ea743.c19c@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/4] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/4] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 3/4] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 4/4] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir From openmp-commits at lists.llvm.org Thu Jul 17 08:11:24 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Thu, 17 Jul 2025 08:11:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) Message-ID: <6879129c.630a0220.2fe78c.1157@mx.google.com> Michael =?utf-8?q?Halkenhäuser?= Message-ID: In-Reply-To: ================ @@ -0,0 +1,116 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." OFF) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMPTARGET_OMPT_SUPPORT}) ---------------- jprotze wrote: My idea was, that we should be able to write tests which describe the expected callback sequence. Some callbacks are "optional" and will not be triggered with cmake option `LIBOMP_OMPT_OPTIONAL=off`. I think it makes sense to allow the test to specify the necessary/optional callbacks (as another assert statement). As a sane default, it probably makes sense to use the mandatory/optional from the standard. But, if we want to run some target tests also with `OMP_OFFLOAD=disabled`, the target callbacks should probably be optional (while I'm actually not sure what `set_callback` reports in this case. Or when compiling without -fopenmp-target?). https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Thu Jul 17 10:39:28 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 10:39:28 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <68793550.170a0220.115a2a.e126@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/5] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/5] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 3/5] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 4/5] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 5/5] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" From openmp-commits at lists.llvm.org Thu Jul 17 12:20:06 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Thu, 17 Jul 2025 12:20:06 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <68794ce6.170a0220.35d9d8.0294@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; ---------------- kevinsala wrote: Okay, I agree https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Thu Jul 17 12:53:55 2025 From: openmp-commits at lists.llvm.org (Alex Duran via Openmp-commits) Date: Thu, 17 Jul 2025 12:53:55 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <687954d3.050a0220.53604.01fe@mx.google.com> https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/143491 >From 8694e6ec1dfa1300641854945c86b15c8d63966e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Tue, 10 Jun 2025 10:39:29 +0200 Subject: [PATCH 01/16] [OFFLOAD][OPENMP] 6.0 compatible interop interface The following patch introduces a new interop interface implementation with the following characteristics: * It supports the new 6.0 prefer_type specification * It supports both explicit objects (from interop constructs) and implicit objects (from variant calls). * Implements a per-thread reuse mechanism for implicit objects to reduce overheads. * It provides a plugin interface that allows selecting the supported interop types, and managing all the backend related interop operations (init, sync, ...). * It enables cooperation with the OpenMP runtime to allow progress on OpenMP synchronizations. * It cleanups some vendor/fr_id mismatchs from the current query routines. * It supports extension to define interop callbacks for library cleanup. --- offload/include/OpenMP/InteropAPI.h | 149 ++++++- offload/include/OpenMP/omp.h | 51 +-- offload/include/PerThreadTable.h | 109 +++++ offload/include/PluginManager.h | 7 +- offload/include/Shared/APITypes.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 + offload/libomptarget/OpenMP/API.cpp | 12 + offload/libomptarget/OpenMP/InteropAPI.cpp | 371 ++++++++++++------ offload/libomptarget/PluginManager.cpp | 6 + offload/libomptarget/exports | 5 +- .../common/include/PluginInterface.h | 55 +++ openmp/runtime/src/kmp.h | 7 + openmp/runtime/src/kmp_barrier.cpp | 8 + openmp/runtime/src/kmp_runtime.cpp | 15 + openmp/runtime/src/kmp_tasking.cpp | 29 ++ 15 files changed, 688 insertions(+), 143 deletions(-) create mode 100644 offload/include/PerThreadTable.h diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 71c78760a3226..61cbedf06a9a6 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -13,17 +13,70 @@ #include "omp.h" +#include "PerThreadTable.h" #include "omptarget.h" extern "C" { typedef enum kmp_interop_type_t { kmp_interop_type_unknown = -1, - kmp_interop_type_platform, - kmp_interop_type_device, - kmp_interop_type_tasksync, + kmp_interop_type_target, + kmp_interop_type_targetsync, } kmp_interop_type_t; +struct interop_attrs_t { + bool inorder : 1; + int reserved : 31; + + /* Check if the supported attributes are compatible with the current + attributes. Only if an attribute is supported can the value be true, + otherwise it needs to be false + */ + bool checkSupportedOnly(interop_attrs_t supported) const { + return supported.inorder || (!supported.inorder && !inorder); + } +}; + +struct interop_spec_t { + int32_t fr_id; + interop_attrs_t attrs; // Common attributes + int64_t impl_attrs; // Implementation specific attributes (recognized by each + // plugin) +}; + +struct interop_flags_t { + bool implicit : 1; // dispatch (true) or interop (false) + bool nowait : 1; // has nowait flag + int reserved : 30; +}; + +struct interop_ctx_t { + uint16_t version; // version of the interface (current is 0) + interop_flags_t flags; + int gtid; +}; + +struct dep_pack_t { + int32_t ndeps; + kmp_depend_info_t *deplist; + int32_t ndeps_noalias; + kmp_depend_info_t *noalias_deplist; +}; + +struct omp_interop_val_t; + +typedef void ompx_interop_cb_t(omp_interop_val_t *interop, void *data); + +struct omp_interop_cb_instance_t { + ompx_interop_cb_t *cb; + void *data; + + omp_interop_cb_instance_t(ompx_interop_cb_t *cb, void *data) + : cb(cb), data(data) {} + + void operator()(omp_interop_val_t *interop) { cb(interop, data); } +}; + /// The interop value type, aka. the interop object. typedef struct omp_interop_val_t { /// Device and interop-type are determined at construction time and fix. @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes + + void *RTLProperty = nullptr; // Plugin dependent information + // For implicitly created Interop objects (e.g., from a dispatch construct) + // who owns the object + int OwnerGtid = -1; + // Marks whether the object was requested since the last time it was synced + bool Clean = true; + + typedef llvm::SmallVector callback_list_t; + + callback_list_t CompletionCbs; + + void reset() { + OwnerGtid = -1; + markClean(); + clearCompletionCbs(); + } + + bool hasOwner() const { return OwnerGtid != -1; } + + void setOwner(int gtid) { OwnerGtid = gtid; } + bool isOwnedBy(int gtid) { return OwnerGtid == gtid; } + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec); + bool isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec, + int64_t DeviceNum, int gtid); + void markClean() { Clean = true; } + void markDirty() { Clean = false; } + bool isClean() const { return Clean; } + + int32_t flush(DeviceTy &Device); + int32_t sync_barrier(DeviceTy &Device); + int32_t async_barrier(DeviceTy &Device); + int32_t release(DeviceTy &Device); + + int32_t flush(); + int32_t syncBarrier(); + int32_t asyncBarrier(); + int32_t release(); + + void addCompletionCb(ompx_interop_cb_t *cb, void *data) { + CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); + } + + int numCompletionCbs() const { return CompletionCbs.size(); } + void clearCompletionCbs() { CompletionCbs.clear(); } + + void runCompletionCbs() { + for (auto &cbInstance : CompletionCbs) + cbInstance(this); + clearCompletionCbs(); + } } omp_interop_val_t; } // extern "C" +struct InteropTableEntry { + using ContainerTy = typename std::vector; + using iterator = typename ContainerTy::iterator; + + ContainerTy Interops; + + const int reservedEntriesPerThread = + 20; // reserve some entries to avoid reallocation + + void add(omp_interop_val_t *obj) { + if (Interops.capacity() == 0) + Interops.reserve(reservedEntriesPerThread); + Interops.push_back(obj); + } + + template void clear(ClearFuncTy f) { + for (auto &Obj : Interops) { + f(Obj); + } + } + + /* vector interface */ + int size() const { return Interops.size(); } + iterator begin() { return Interops.begin(); } + iterator end() { return Interops.end(); } + iterator erase(iterator it) { return Interops.erase(it); } +}; + +struct InteropTblTy + : public PerThreadTable { + void clear(); +}; + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h index b44c6aff1b289..67b3bab9e8599 100644 --- a/offload/include/OpenMP/omp.h +++ b/offload/include/OpenMP/omp.h @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; +/* Foreign runtime values from OpenMP Additional Definitions document v2.1 */ +typedef enum omp_foreign_runtime_id_t { + omp_fr_none = 0, + omp_fr_cuda = 1, + omp_fr_cuda_driver = 2, + omp_fr_opencl = 3, + omp_fr_sycl = 4, + omp_fr_hip = 5, + omp_fr_level_zero = 6, + omp_fr_hsa = 7, + omp_fr_last = 8 +} omp_foreign_runtime_id_t; typedef void *omp_interop_t; @@ -134,19 +137,23 @@ omp_get_interop_type_desc(const omp_interop_t, omp_interop_property_t); extern const char *__KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_t, omp_interop_rc_t); -typedef enum omp_interop_backend_type_t { - // reserve 0 - omp_interop_backend_type_cuda_1 = 1, -} omp_interop_backend_type_t; - -typedef enum omp_foreign_runtime_ids { - cuda = 1, - cuda_driver = 2, - opencl = 3, - sycl = 4, - hip = 5, - level_zero = 6, -} omp_foreign_runtime_ids_t; +/* Vendor defined values from OpenMP Additional Definitions document v2.1*/ +typedef enum omp_vendor_id { + omp_vendor_unknown = 0, + omp_vendor_amd = 1, + omp_vendor_arm = 2, + omp_vendor_bsc = 3, + omp_vendor_fujitsu = 4, + omp_vendor_gnu = 5, + omp_vendor_hpe = 6, + omp_vendor_ibm = 7, + omp_vendor_intel = 8, + omp_vendor_llvm = 9, + omp_vendor_nec = 10, + omp_vendor_nvidia = 11, + omp_vendor_ti = 12, + omp_vendor_last = 13 +} omp_vendor_id_t; ///} InteropAPI diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h new file mode 100644 index 0000000000000..1e20b56c734d2 --- /dev/null +++ b/offload/include/PerThreadTable.h @@ -0,0 +1,109 @@ +//===-- PerThreadTable.h -- PerThread Storage Structure ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Table indexed with one entry per thread. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOAD_PERTHREADTABLE_H +#define OFFLOAD_PERTHREADTABLE_H + +#include +#include +#include + +// Using an STL container (such as std::vector) indexed by thread ID has +// too many race conditions issues so we store each thread entry into a +// thread_local variable. +// T is the container type used to store the objects, e.g., std::vector, +// std::set, etc. by each thread. O is the type of the stored objects e.g., +// omp_interop_val_t *, ... + +template struct PerThreadTable { + using iterator = typename ContainerType::iterator; + + struct PerThreadData { + size_t NElements = 0; + std::unique_ptr ThEntry; + }; + + std::mutex Mtx; + std::list ThreadDataList; + + // define default constructors, disable copy and move constructors + PerThreadTable() = default; + PerThreadTable(const PerThreadTable &) = delete; + PerThreadTable(PerThreadTable &&) = delete; + PerThreadTable &operator=(const PerThreadTable &) = delete; + PerThreadTable &operator=(PerThreadTable &&) = delete; + ~PerThreadTable() { + std::lock_guard Lock(Mtx); + ThreadDataList.clear(); + } + +private: + PerThreadData &getThreadData() { + static thread_local PerThreadData ThData; + return ThData; + } + +protected: + ContainerType &getThreadEntry() { + auto &ThData = getThreadData(); + if (ThData.ThEntry) + return *ThData.ThEntry; + ThData.ThEntry = std::make_unique(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(&ThData); + return *ThData.ThEntry; + } + + size_t &getThreadNElements() { + auto &ThData = getThreadData(); + return ThData.NElements; + } + +public: + void add(ObjectType obj) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements++; + Entry.add(obj); + } + + iterator erase(iterator it) { + auto &Entry = getThreadEntry(); + auto &NElements = getThreadNElements(); + NElements--; + return Entry.erase(it); + } + + size_t size() { return getThreadNElements(); } + + // Iterators to traverse objects owned by + // the current thread + iterator begin() { + auto &Entry = getThreadEntry(); + return Entry.begin(); + } + iterator end() { + auto &Entry = getThreadEntry(); + return Entry.end(); + } + + template void clear(F f) { + std::lock_guard Lock(Mtx); + for (auto ThData : ThreadDataList) { + ThData->ThEntry->clear(f); + ThData->NElements = 0; + } + ThreadDataList.clear(); + } +}; + +#endif diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ec3adadf0819b..ea1f3b6406ce7 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -35,6 +35,8 @@ #include #include +#include "OpenMP/InteropAPI.h" + using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins @@ -88,6 +90,9 @@ struct PluginManager { HostPtrToTableMapTy HostPtrToTableMap; std::mutex TblMapMtx; ///< For HostPtrToTableMap + /// Table of cached implicit interop objects + InteropTblTy InteropTbl; + // Work around for plugins that call dlopen on shared libraries that call // tgt_register_lib during their initialisation. Stash the pointers in a // vector until the plugins are all initialised and then register them. @@ -185,5 +190,5 @@ void initRuntime(); void deinitRuntime(); extern PluginManager *PM; - +extern std::atomic RTLAlive; // Indicates if the RTL has been initialized #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h index 978b53d5d69b9..f376c7dc861f9 100644 --- a/offload/include/Shared/APITypes.h +++ b/offload/include/Shared/APITypes.h @@ -36,6 +36,7 @@ struct __tgt_device_image { struct __tgt_device_info { void *Context = nullptr; void *Device = nullptr; + void *Platform = nullptr; }; /// This struct is a record of all the host code that may be offloaded to a diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 29b573a27d087..134ab7c95ac0b 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -22,6 +22,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; +std::atomic RTLAlive{false}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -41,6 +42,9 @@ void initRuntime() { PM->init(); PM->registerDelayedLibraries(); + + // RTL initialization is complete + RTLAlive = true; } } @@ -50,6 +54,8 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + // RTL deinitialization has started + RTLAlive = false; PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..f61f56772504b 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -683,3 +683,15 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } + +void syncImplicitInterops(int gtid, void *event); +// This routine gets called from the Host RTL at sync points (taskwait, barrier, +// ...) so we can synchronize the necessary objects from the offload side. +EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, + void *event) { + + if (!RTLAlive) + return; + + syncImplicitInterops(gtid, event); +} diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index bdbc440c64a2c..55e47d87a865d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -10,6 +10,7 @@ #include "OpenMP/InternalTypes.h" #include "OpenMP/omp.h" +#include "OffloadPolicy.h" #include "PluginManager.h" #include "device.h" #include "omptarget.h" @@ -56,22 +57,22 @@ void getTypeMismatch(omp_interop_property_t Property, int *Err) { *Err = getPropertyErrorType(Property); } -const char *getVendorIdToStr(const omp_foreign_runtime_ids_t VendorId) { - switch (VendorId) { - case cuda: - return ("cuda"); - case cuda_driver: - return ("cuda_driver"); - case opencl: - return ("opencl"); - case sycl: - return ("sycl"); - case hip: - return ("hip"); - case level_zero: - return ("level_zero"); - } - return ("unknown"); +static const char *VendorStrTbl[] = { + "unknown", "amd", "arm", "bsc", "fujitsu", "gnu", "hpe", + "ibm", "intel", "llvm", "nec", "nvidia", "ti"}; +const char *getVendorIdToStr(const omp_vendor_id_t VendorId) { + if (VendorId < omp_vendor_unknown || VendorId >= omp_vendor_last) + return ("unknown"); + return VendorStrTbl[VendorId]; +} + +static const char *ForeignRuntimeStrTbl[] = { + "none", "cuda", "cuda_driver", "opencl", + "sycl", "hip", "level_zero", "hsa"}; +const char *getForeignRuntimeIdToStr(const omp_foreign_runtime_id_t FrId) { + if (FrId < omp_fr_none || FrId >= omp_fr_last) + return ("unknown"); + return ForeignRuntimeStrTbl[FrId]; } template @@ -83,7 +84,7 @@ intptr_t getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { case omp_ipr_fr_id: - return InteropVal.backend_type_id; + return InteropVal.fr_id; case omp_ipr_vendor: return InteropVal.vendor_id; case omp_ipr_device_num: @@ -99,10 +100,8 @@ const char *getProperty(omp_interop_val_t &InteropVal, omp_interop_property_t Property, int *Err) { switch (Property) { - case omp_ipr_fr_id: - return InteropVal.interop_type == kmp_interop_type_tasksync - ? "tasksync" - : "device+context"; + case omp_ipr_fr_name: + return getForeignRuntimeIdToStr(InteropVal.fr_id); case omp_ipr_vendor_name: return getVendorIdToStr(InteropVal.vendor_id); default: @@ -120,6 +119,8 @@ void *getProperty(omp_interop_val_t &InteropVal, return InteropVal.device_info.Device; *Err = omp_irc_no_value; return const_cast(InteropVal.err_str); + case omp_ipr_platform: + return InteropVal.device_info.Platform; case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: @@ -145,13 +146,13 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, return false; } if (Property == omp_ipr_targetsync && - (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type != kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; } if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && - (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + (*InteropPtr)->interop_type == kmp_interop_type_targetsync) { if (Err) *Err = omp_irc_other; return false; @@ -166,7 +167,7 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_tasksync); \ + assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -193,119 +194,263 @@ __OMP_GET_INTEROP_TY3(const char *, type_desc) __OMP_GET_INTEROP_TY3(const char *, rc_desc) #undef __OMP_GET_INTEROP_TY3 -static const char *copyErrorString(llvm::Error &&Err) { - // TODO: Use the error string while avoiding leaks. - std::string ErrMsg = llvm::toString(std::move(Err)); - char *UsrMsg = reinterpret_cast(malloc(ErrMsg.size() + 1)); - strcpy(UsrMsg, ErrMsg.c_str()); - return UsrMsg; -} - extern "C" { -void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, - kmp_interop_type_t InteropType, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropType != kmp_interop_type_unknown && - "Cannot initialize with unknown interop_type!"); - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, + int64_t DeviceNum, int32_t NumPrefers, + interop_spec_t *Prefers, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + + DP("Call to %s with device_num %" PRId64 ", interop type %" PRId32 + ", number of preferred specs %" PRId32 "%s%s\n", + __func__, DeviceNum, InteropType, NumPrefers, + Ctx->flags.implicit ? " (implicit)" : "", + Ctx->flags.nowait ? " (nowait)" : ""); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) + return omp_interop_none; + + // Now, try to create an interop with device_num. + if (DeviceNum == OFFLOAD_DEVICE_DEFAULT) + DeviceNum = omp_get_default_device(); + + auto gtid = Ctx->gtid; + + if (InteropType == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop creation not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) { + [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + DP("Couldn't find device %" PRId64 + " while constructing interop object: %s\n", + DeviceNum, ErrStr.c_str()); + return omp_interop_none; + } + auto &Device = *DeviceOrErr; + omp_interop_val_t *Interop = omp_interop_none; + auto InteropSpec = Device.RTL->select_interop_preference( + DeviceNum, InteropType, NumPrefers, Prefers); + if (InteropSpec.fr_id == omp_fr_none) { + DP("Interop request not supported by device %" PRId64 "\n", DeviceNum); + return omp_interop_none; + } + DP("Selected interop preference is fr_id=%s%s impl_attrs=%" PRId64 "\n", + getForeignRuntimeIdToStr((omp_foreign_runtime_id_t)InteropSpec.fr_id), + InteropSpec.attrs.inorder ? " inorder" : "", InteropSpec.impl_attrs); + + if (Ctx->flags.implicit) { + // This is a request for an RTL managed interop object. + // Get it from the InteropTbl if possible + if (PM->InteropTbl.size() > 0) { + for (auto iop : PM->InteropTbl) { + if (iop->isCompatibleWith(InteropType, InteropSpec, DeviceNum, gtid)) { + Interop = iop; + Interop->markDirty(); + DP("Reused interop " DPxMOD " from device number %" PRId64 + " for gtid %" PRId32 "\n", + DPxPTR(Interop), DeviceNum, gtid); + return Interop; + } + } + } } - InteropPtr = new omp_interop_val_t(DeviceId, InteropType); - - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + Interop = Device.RTL->create_interop(DeviceNum, InteropType, &InteropSpec); + DP("Created an interop " DPxMOD " from device number %" PRId64 "\n", + DPxPTR(Interop), DeviceNum); + + if (Ctx->flags.implicit) { + // register the new implicit interop in the RTL + Interop->setOwner(gtid); + Interop->markDirty(); + PM->InteropTbl.add(Interop); + } else { + Interop->setOwner(-1); } - DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || - Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, - &(InteropPtr)->err_str)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + return Interop; +} + +int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + bool nowait = Ctx->flags.nowait; + DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, + DPxPTR(Interop), nowait); + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop use not supported yet. " + "Ignored\n"); + if (Deps) + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); } - if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || - Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { - delete InteropPtr; - InteropPtr = omp_interop_none; + + if (Interop->async_info && Interop->async_info->Queue) { + if (nowait) + Interop->asyncBarrier(); + else { + Interop->flush(); + Interop->syncBarrier(); + Interop->markClean(); } } + + return OFFLOAD_SUCCESS; } -void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); - } - assert(InteropVal != omp_interop_none && - "Cannot use uninitialized interop_ptr!"); - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); +int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { + DP("Call to %s with interop " DPxMOD "\n", __func__, DPxPTR(Interop)); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); - return; + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return OFFLOAD_FAIL; + + if (!Interop) + return OFFLOAD_FAIL; + + if (Interop->interop_type == kmp_interop_type_targetsync) { + if (Ctx->flags.nowait) + DP("Warning: nowait flag on interop destroy not supported yet. " + "Ignored\n"); + if (Deps) { + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, + Deps->ndeps_noalias, Deps->noalias_deplist); + } } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + return Interop->release(); +} + +} // extern "C" + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec) { + if (interop_type != InteropType) + return false; + if (Spec.fr_id != fr_id) + return false; + if (Spec.attrs.inorder != attrs.inorder) + return false; + if (Spec.impl_attrs != impl_attrs) + return false; + + return true; +} + +bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, + const interop_spec_t &Spec, + int64_t DeviceNum, int GTID) { + if (device_id != DeviceNum) + return false; + + if (GTID != OwnerGtid) + return false; + + return isCompatibleWith(InteropType, Spec); +} + +int32_t omp_interop_val_t::flush(DeviceTy &Device) { + return Device.RTL->flush_queue(this); +} + +int32_t omp_interop_val_t::sync_barrier(DeviceTy &Device) { + if (Device.RTL->sync_barrier(this) != OFFLOAD_SUCCESS) { + FATAL_MESSAGE(device_id, "Interop sync barrier failed for %p object\n", + this); } - // TODO Flush the queue associated with the interop through the plugin + DP("Calling completion callbacks for " DPxMOD "\n", DPxPTR(this)); + runCompletionCbs(); + return OFFLOAD_SUCCESS; +} + +int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { + return Device.RTL->async_barrier(this); } -void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, - omp_interop_val_t *&InteropPtr, int32_t DeviceId, - int32_t Ndeps, kmp_depend_info_t *DepList, - int32_t HaveNowait) { - int32_t NdepsNoalias = 0; - kmp_depend_info_t *NoaliasDepList = NULL; - assert(InteropPtr && "Cannot use nullptr!"); - omp_interop_val_t *InteropVal = InteropPtr; - if (DeviceId == -1) { - DeviceId = omp_get_default_device(); +int32_t omp_interop_val_t::release(DeviceTy &Device) { + if (async_info != nullptr && (!hasOwner() || !isClean())) { + flush(); + syncBarrier(); } + return Device.RTL->release_interop(device_id, this); +} - if (InteropVal == omp_interop_none) - return; +int32_t omp_interop_val_t::flush() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return flush(Device); +} - assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && - "Inconsistent device-id usage!"); - auto DeviceOrErr = PM->getDevice(DeviceId); - if (!DeviceOrErr) { - InteropPtr->err_str = copyErrorString(DeviceOrErr.takeError()); +int32_t omp_interop_val_t::syncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return sync_barrier(Device); +} + +int32_t omp_interop_val_t::asyncBarrier() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return async_barrier(Device); +} + +int32_t omp_interop_val_t::release() { + auto DeviceOrErr = PM->getDevice(device_id); + if (!DeviceOrErr) + FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); + DeviceTy &Device = *DeviceOrErr; + return release(Device); +} + +void syncImplicitInterops(int gtid, void *event) { + if (PM->InteropTbl.size() == 0) return; - } - if (InteropVal->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, - NoaliasDepList); + DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", + gtid, DPxPTR(event)); + + for (auto iop : PM->InteropTbl) { + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + !iop->isClean()) { + + iop->flush(); + iop->syncBarrier(); + iop->markClean(); + + // TODO: Alternate implementation option + // Instead of using a synchronous barrier, queue an asynchronous + // barrier and create a proxy task associated to the event to handle + // OpenMP synchronizations. + // When the event is completed, fulfill the proxy task to notify the + // OpenMP runtime. + // event = iop->asyncBarrier(); + // ptask = createProxyTask(); + // Events->add(event,ptask); + } } - // TODO Flush the queue associated with the interop through the plugin - // TODO Signal out dependences - - delete InteropPtr; - InteropPtr = omp_interop_none; + // This would be needed for the alternate implementation + // processEvents(); } -} // extern "C" +void InteropTblTy::clear() { + DP("Clearing Interop Table\n"); + PerThreadTable::clear([](auto &IOP) { IOP->release(); }); +} diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 93589960a426d..2cc1314e7a4f0 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -128,6 +128,12 @@ void PluginManager::initializeAllDevices() { initializeDevice(Plugin, DeviceId); } } + // After all plugins are initialized, register atExit cleanup handlers + std::atexit([]() { + // Interop cleanup should be done before the plugins are deinitialized as + // the backend libraries may be already unloaded. + PM->InteropTbl.clear(); + }); } // Returns a pointer to the binary descriptor, upgrading from a legacy format if diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..b40d9b22a1be9 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -67,9 +67,10 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_init; + __tgt_interop_get; __tgt_interop_use; - __tgt_interop_destroy; + __tgt_interop_release; + __tgt_target_sync; __llvmPushCallConfiguration; __llvmPopCallConfiguration; llvmLaunchKernel; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index d2437908a0a6f..40a428dbccb06 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -20,6 +20,7 @@ #include #include "ExclusiveAccess.h" +#include "OpenMP/InteropAPI.h" #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" @@ -937,6 +938,21 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + virtual omp_interop_val_t *createInterop(int32_t InteropType, + interop_spec_t &InteropSpec) { + return nullptr; + } + + virtual int32_t releaseInterop(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + virtual interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + return interop_spec_t{omp_fr_none, {false, 0}, 0}; + } + /// Allocate and construct a kernel object. virtual Expected constructKernel(const char *Name) = 0; @@ -1342,6 +1358,45 @@ struct GenericPluginTy { int32_t get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr); + /// Return the interop specification that the plugin supports + /// It might not be one of the user specified ones. + interop_spec_t select_interop_preference(int32_t ID, int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) { + auto &Device = getDevice(ID); + return Device.selectInteropPreference(InteropType, NumPrefers, Prefers); + } + + /// Create OpenMP interop with the given interop context + omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, + interop_spec_t *InteropSpec) { + auto &Device = getDevice(ID); + return Device.createInterop(InteropContext, *InteropSpec); + } + + /// Release OpenMP interop object + int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + auto &Device = getDevice(ID); + return Device.releaseInterop(Interop); + } + + /// Flush the queue associated with the interop object if necessary + virtual int32_t flush_queue(omp_interop_val_t *Interop) { + return OFFLOAD_SUCCESS; + } + + /// Queue a synchronous barrier in the queue associated with the interop + /// object and wait for it to complete. + virtual int32_t sync_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + + /// Queue an asynchronous barrier in the queue associated with the interop + /// object and return immediately. + virtual int32_t async_barrier(omp_interop_val_t *Interop) { + return OFFLOAD_FAIL; + } + private: /// Indicates if the platform runtime has been fully initialized. bool Initialized = false; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index a2cacc8792b15..9c4939b029861 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4665,6 +4665,13 @@ static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { return adjusted_gtid; } +#if ENABLE_LIBOMPTARGET +// Pointers to callbacks registered by the offload library to be notified of +// task progress. +extern void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, + void *current_task, void *event); +#endif // ENABLE_LIBOMPTARGET + // Support for error directive typedef enum kmp_severity_t { severity_warning = 1, diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..c6908c35fc3d9 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -1828,6 +1828,14 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // proxy tasks if necessary + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)( + NULL, gtid, KMP_TASKDATA_TO_TASK(this_thr->th.th_current_task), NULL); +#endif + if (!team->t.t_serialized) { #if USE_ITT_BUILD // This value will be used in itt notify events below. diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 417eceb8ebecc..d99d1a410b5d3 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -93,6 +93,9 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only = 0); #endif static void __kmp_do_serial_initialize(void); +#if ENABLE_LIBOMPTARGET +static void __kmp_target_init(void); +#endif // ENABLE_LIBOMPTARGET void __kmp_fork_barrier(int gtid, int tid); void __kmp_join_barrier(int gtid); void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, @@ -7173,6 +7176,9 @@ static void __kmp_do_serial_initialize(void) { #if KMP_MIC_SUPPORTED __kmp_check_mic_type(); #endif +#if ENABLE_LIBOMPTARGET + __kmp_target_init(); +#endif /* ENABLE_LIBOMPTARGET */ // Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG @@ -9386,6 +9392,15 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +#if ENABLE_LIBOMPTARGET +void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, + void *event) = NULL; +void __kmp_target_init() { + // Look for hooks in the libomptarget library + *(void **)(&kmp_target_sync_cb) = KMP_DLSYM("__tgt_target_sync"); +} +#endif // ENABLE_LIBOMPTARGET + // Empty symbols to export (see exports_so.txt) when feature is disabled extern "C" { #if !KMP_STATS_ENABLED diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 3d85a29423540..d45e3d690510e 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1378,6 +1378,13 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, // thread: thread data structure corresponding to implicit task void __kmp_finish_implicit_task(kmp_info_t *thread) { kmp_taskdata_t *task = thread->th.th_current_task; +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to synchronize any unfinished + // target async regions before finishing the implicit task + if (UNLIKELY(kmp_target_sync_cb != NULL)) + (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid, + KMP_TASKDATA_TO_TASK(task), NULL); +#endif // ENABLE_LIBOMPTARGET if (task->td_dephash) { int children; task->td_flags.complete = 1; @@ -2249,6 +2256,14 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, } #endif // OMPT_SUPPORT && OMPT_OPTIONAL +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata), + NULL); +#endif // ENABLE_LIBOMPTARGET + // Debugger: The taskwait is active. Store location and thread encountered the // taskwait. #if USE_ITT_BUILD @@ -2948,6 +2963,13 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) { } #endif +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress and create + // any necessary proxy tasks + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL); +#endif // ENABLE_LIBOMPTARGET + if (!taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && (thread->th.th_task_team->tt.tt_found_proxy_tasks || @@ -3391,6 +3413,13 @@ static inline int __kmp_execute_tasks_template( while (1) { // Outer loop keeps trying to find tasks in case of single thread // getting tasks from target constructs while (1) { // Inner loop to find a task and execute it +#if ENABLE_LIBOMPTARGET + // Give an opportunity to the offload runtime to make progress + if (UNLIKELY(kmp_target_sync_cb)) + (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task), + NULL); +#endif // ENABLE_LIBOMPTARGET + task = NULL; if (task_team->tt.tt_num_task_pri) { // get priority task first task = __kmp_get_priority_task(gtid, task_team, is_constrained); >From 81c7402e96fc54b90cf9a1eb46f9f6c2d22ccd13 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 13:55:30 +0200 Subject: [PATCH 02/16] Add missed ext API and minor fix --- offload/libomptarget/OpenMP/InteropAPI.cpp | 34 +++++++++++++++++++--- offload/libomptarget/exports | 1 + 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 55e47d87a865d..14b1f85802464 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -290,12 +290,16 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { - if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop use not supported yet. " - "Ignored\n"); - if (Deps) + if (Deps) { + if (nowait) { + DP("Warning: nowait flag on interop use with dependences not supported yet. " + "Ignored\n"); + nowait = false; + } + __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); + } } if (Interop->async_info && Interop->async_info->Queue) { @@ -334,6 +338,28 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } + +EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, + ompx_interop_cb_t *cb, + void *data) { + DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD + "and data " DPxMOD "\n", + __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + + if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) + return omp_irc_other; + + if (!Interop) { + DP("Call to %s with invalid interop\n", __func__); + return omp_irc_empty; + } + + Interop->addCompletionCb(cb, data); + + return omp_irc_success; +} + + } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index b40d9b22a1be9..8e2db6ba8bba4 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -36,6 +36,7 @@ VERS1.0 { __kmpc_push_target_tripcount; __kmpc_push_target_tripcount_mapper; ompx_dump_mapping_tables; + ompx_interop_add_completion_callback; omp_get_mapped_ptr; omp_get_num_devices; omp_get_device_num; >From 0a2825532e274fde73c3633ea471e9c9f3050f48 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 2 Jul 2025 14:13:38 +0200 Subject: [PATCH 03/16] Fix format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 14b1f85802464..c6413431b3e13 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -327,8 +327,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) - DP("Warning: nowait flag on interop destroy not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop destroy not supported " + "yet. Ignored\n"); if (Deps) { __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, Deps->ndeps_noalias, Deps->noalias_deplist); @@ -338,7 +338,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, return Interop->release(); } - EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, ompx_interop_cb_t *cb, void *data) { @@ -359,7 +358,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } - } // extern "C" bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, >From b1a9e1071f7979ef56f03334eb596eb0d6be4507 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:21:02 +0200 Subject: [PATCH 04/16] Fix corner cases related to atexit ordering in PerThreadTable --- offload/include/PerThreadTable.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 1e20b56c734d2..7712cffc308bd 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -33,7 +33,7 @@ template struct PerThreadTable { }; std::mutex Mtx; - std::list ThreadDataList; + std::list> ThreadDataList; // define default constructors, disable copy and move constructors PerThreadTable() = default; @@ -48,8 +48,13 @@ template struct PerThreadTable { private: PerThreadData &getThreadData() { - static thread_local PerThreadData ThData; - return ThData; + static thread_local std::shared_ptr ThData = nullptr; + if (!ThData) { + ThData = std::make_shared(); + std::lock_guard Lock(Mtx); + ThreadDataList.push_back(ThData); + } + return *ThData; } protected: @@ -58,8 +63,6 @@ template struct PerThreadTable { if (ThData.ThEntry) return *ThData.ThEntry; ThData.ThEntry = std::make_unique(); - std::lock_guard Lock(Mtx); - ThreadDataList.push_back(&ThData); return *ThData.ThEntry; } @@ -99,6 +102,8 @@ template struct PerThreadTable { template void clear(F f) { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { + if (!ThData->ThEntry || ThData->NElements == 0) + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } >From b72e46a6bd9605b9640cc90a42cf05fe1440e6f1 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 19:31:17 +0200 Subject: [PATCH 05/16] fix format --- offload/include/PerThreadTable.h | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h index 7712cffc308bd..45b196171b4c8 100644 --- a/offload/include/PerThreadTable.h +++ b/offload/include/PerThreadTable.h @@ -103,7 +103,7 @@ template struct PerThreadTable { std::lock_guard Lock(Mtx); for (auto ThData : ThreadDataList) { if (!ThData->ThEntry || ThData->NElements == 0) - continue; + continue; ThData->ThEntry->clear(f); ThData->NElements = 0; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index c6413431b3e13..57be23f10d24d 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -292,8 +292,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported yet. " - "Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported + "yet. Ignored\n"); nowait = false; } >From fd69d49e8509161925d03015e5706c36a47b64b2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:17:47 +0200 Subject: [PATCH 06/16] remove unnecessary conditions --- offload/libomptarget/OpenMP/InteropAPI.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 57be23f10d24d..fa6325333c606 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -286,9 +286,6 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { @@ -322,9 +319,6 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; - if (!Interop) - return OFFLOAD_FAIL; - if (Interop->interop_type == kmp_interop_type_targetsync) { if (Ctx->flags.nowait) DP("Warning: nowait flag on interop destroy not supported " @@ -348,11 +342,6 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - if (!Interop) { - DP("Call to %s with invalid interop\n", __func__); - return omp_irc_empty; - } - Interop->addCompletionCb(cb, data); return omp_irc_success; >From fbca38468cd004afb311d5066abcc3c5ca96392e Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 10 Jul 2025 21:52:30 +0200 Subject: [PATCH 07/16] another corner case when unloading --- offload/libomptarget/PluginManager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 2cc1314e7a4f0..f5d913f2b8909 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -132,7 +132,8 @@ void PluginManager::initializeAllDevices() { std::atexit([]() { // Interop cleanup should be done before the plugins are deinitialized as // the backend libraries may be already unloaded. - PM->InteropTbl.clear(); + if (PM) + PM->InteropTbl.clear(); }); } >From f5715cdccdbcf60f5ac81d93bff2c08059ef5dd2 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 19:50:01 +0200 Subject: [PATCH 08/16] make version 32bits to simplify codegen --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 61cbedf06a9a6..3662d221e4bd0 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -51,7 +51,7 @@ struct interop_flags_t { }; struct interop_ctx_t { - uint16_t version; // version of the interface (current is 0) + uint32_t version; // version of the interface (current is 0) interop_flags_t flags; int gtid; }; >From 82fa72d175aa98a8983cc0365756aaa61a51a9c3 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Mon, 14 Jul 2025 23:50:42 +0200 Subject: [PATCH 09/16] Fix sporadic race condition with helper threads on deinit --- offload/include/PluginManager.h | 1 + offload/libomptarget/OffloadRTL.cpp | 6 ++++++ offload/libomptarget/OpenMP/API.cpp | 9 ++++++++- offload/libomptarget/OpenMP/InteropAPI.cpp | 4 ++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index ea1f3b6406ce7..6c6fdebe76dff 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -191,4 +191,5 @@ void deinitRuntime(); extern PluginManager *PM; extern std::atomic RTLAlive; // Indicates if the RTL has been initialized +extern std::atomic RTLOngoingSyncs; // Counts ongoing external syncs #endif // OMPTARGET_PLUGIN_MANAGER_H diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp index 134ab7c95ac0b..04bd21ec91a49 100644 --- a/offload/libomptarget/OffloadRTL.cpp +++ b/offload/libomptarget/OffloadRTL.cpp @@ -23,6 +23,7 @@ extern void llvm::omp::target::ompt::connectLibrary(); static std::mutex PluginMtx; static uint32_t RefCount = 0; std::atomic RTLAlive{false}; +std::atomic RTLOngoingSyncs{0}; void initRuntime() { std::scoped_lock Lock(PluginMtx); @@ -56,6 +57,11 @@ void deinitRuntime() { DP("Deinit offload library!\n"); // RTL deinitialization has started RTLAlive = false; + while (RTLOngoingSyncs > 0) { + DP("Waiting for ongoing syncs to finish, count: %d\n", + RTLOngoingSyncs.load()); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } PM->deinit(); delete PM; PM = nullptr; diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index f61f56772504b..bffb92722a057 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -689,9 +689,16 @@ void syncImplicitInterops(int gtid, void *event); // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, void *event) { - if (!RTLAlive) return; + RTLOngoingSyncs++; + if (!RTLAlive) { + RTLOngoingSyncs--; + return; + } + syncImplicitInterops(gtid, event); + + RTLOngoingSyncs--; } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index fa6325333c606..b174ec487a93a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -289,8 +289,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { if (nowait) { - DP("Warning: nowait flag on interop use with dependences not supported - "yet. Ignored\n"); + DP("Warning: nowait flag on interop use with dependences not supported" + "yet. Ignored\n"); nowait = false; } >From 0c29ac6c62d1da2a4b55a9d20489fccc22db4fc8 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Wed, 16 Jul 2025 19:43:02 +0200 Subject: [PATCH 10/16] change const to constexpr --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 3662d221e4bd0..2fbd6a2035e47 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -152,7 +152,7 @@ struct InteropTableEntry { ContainerTy Interops; - const int reservedEntriesPerThread = + static constexpr int reservedEntriesPerThread = 20; // reserve some entries to avoid reallocation void add(omp_interop_val_t *obj) { >From 0440af027394b21ad89639e61f8925d78a9cd884 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 10:59:56 +0200 Subject: [PATCH 11/16] address review comments --- offload/include/OpenMP/InteropAPI.h | 11 +++++---- offload/libomptarget/OpenMP/API.cpp | 2 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 27 +++++++++++----------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 2fbd6a2035e47..5b3c230900695 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -28,10 +28,9 @@ struct interop_attrs_t { bool inorder : 1; int reserved : 31; - /* Check if the supported attributes are compatible with the current - attributes. Only if an attribute is supported can the value be true, - otherwise it needs to be false - */ + /// Check if the supported attributes are compatible with the current + /// attributes. Only if an attribute is supported can the value be true, + /// otherwise it needs to be false bool checkSupportedOnly(interop_attrs_t supported) const { return supported.inorder || (!supported.inorder && !inorder); } @@ -167,7 +166,7 @@ struct InteropTableEntry { } } - /* vector interface */ + /// vector interface int size() const { return Interops.size(); } iterator begin() { return Interops.begin(); } iterator end() { return Interops.end(); } @@ -179,4 +178,6 @@ struct InteropTblTy void clear(); }; +void syncImplicitInterops(int gtid, void *event); + #endif // OMPTARGET_OPENMP_INTEROP_API_H diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index bffb92722a057..b0f0573833713 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -16,6 +16,7 @@ #include "rtl.h" #include "OpenMP/InternalTypes.h" +#include "OpenMP/InteropAPI.h" #include "OpenMP/Mapping.h" #include "OpenMP/OMPT/Interface.h" #include "OpenMP/omp.h" @@ -684,7 +685,6 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return TPR.TargetPointer; } -void syncImplicitInterops(int gtid, void *event); // This routine gets called from the Host RTL at sync points (taskwait, barrier, // ...) so we can synchronize the necessary objects from the offload side. EXTERN void __tgt_target_sync(ident_t *loc_ref, int gtid, void *current_task, diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index b174ec487a93a..51d411e5ab49a 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -227,10 +227,9 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) { - [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); DP("Couldn't find device %" PRId64 " while constructing interop object: %s\n", - DeviceNum, ErrStr.c_str()); + DeviceNum, toString(DeviceOrErr.takeError()).c_str()); return omp_interop_none; } auto &Device = *DeviceOrErr; @@ -280,18 +279,18 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, interop_ctx_t *Ctx, dep_pack_t *Deps) { - bool nowait = Ctx->flags.nowait; + bool Nowait = Ctx->flags.nowait; DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, - DPxPTR(Interop), nowait); + DPxPTR(Interop), Nowait); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return OFFLOAD_FAIL; if (Interop->interop_type == kmp_interop_type_targetsync) { if (Deps) { - if (nowait) { + if (Nowait) { DP("Warning: nowait flag on interop use with dependences not supported" "yet. Ignored\n"); - nowait = false; + Nowait = false; } __kmpc_omp_wait_deps(LocRef, Ctx->gtid, Deps->ndeps, Deps->deplist, @@ -300,7 +299,7 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, } if (Interop->async_info && Interop->async_info->Queue) { - if (nowait) + if (Nowait) Interop->asyncBarrier(); else { Interop->flush(); @@ -333,16 +332,16 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, } EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, - ompx_interop_cb_t *cb, - void *data) { + ompx_interop_cb_t *CB, + void *Data) { DP("Call to %s with interop " DPxMOD ", property callback " DPxMOD "and data " DPxMOD "\n", - __func__, DPxPTR(Interop), DPxPTR(cb), DPxPTR(data)); + __func__, DPxPTR(Interop), DPxPTR(CB), DPxPTR(Data)); if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED || !Interop) return omp_irc_other; - Interop->addCompletionCb(cb, data); + Interop->addCompletionCb(CB, Data); return omp_irc_success; } @@ -433,15 +432,15 @@ int32_t omp_interop_val_t::release() { return release(Device); } -void syncImplicitInterops(int gtid, void *event) { +void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; DP("target_sync: syncing interops for gtid %" PRId32 ", event " DPxMOD "\n", - gtid, DPxPTR(event)); + Gtid, DPxPTR(Event)); for (auto iop : PM->InteropTbl) { - if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(gtid) && + if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && !iop->isClean()) { iop->flush(); >From 85eb7a7163f1711b90a20cb08dcdd1175682a3e5 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 11:50:42 +0200 Subject: [PATCH 12/16] Add asserts; Bury virtual interfaces --- .../common/include/PluginInterface.h | 117 ++++++++++++------ 1 file changed, 77 insertions(+), 40 deletions(-) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 40a428dbccb06..9e16efd37554b 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -60,6 +60,39 @@ struct GenericKernelTy; struct GenericDeviceTy; struct RecordReplayTy; +namespace Plugin { +/// Create a success error. This is the same as calling Error::success(), but +/// it is recommended to use this one for consistency with Plugin::error() and +/// Plugin::check(). +static inline Error success() { return Error::success(); } + +/// Create an Offload error. +template +static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) { + return error::createOffloadError(Code, ErrFmt, Args...); +} + +inline Error error(error::ErrorCode Code, const char *S) { + return make_error(Code, S); +} + +inline Error error(error::ErrorCode Code, Error &&OtherError, + const char *Context) { + return error::createOffloadError(Code, std::move(OtherError), Context); +} + +/// Check the plugin-specific error code and return an error or success +/// accordingly. In case of an error, create a string error with the error +/// description. The ErrFmt should follow the format: +/// "Error in []: %s" +/// The last format specifier "%s" is mandatory and will be used to place the +/// error code's description. Notice this function should be only called from +/// the plugin-specific code. +/// TODO: Refactor this, must be defined individually by each plugin. +template +static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); +} // namespace Plugin + /// Class that wraps the __tgt_async_info to simply its usage. In case the /// object is constructed without a valid __tgt_async_info, the object will use /// an internal one and will synchronize the current thread with the pending @@ -1219,6 +1252,20 @@ struct GenericPluginTy { virtual Expected isELFCompatible(uint32_t DeviceID, StringRef Image) const = 0; + virtual Error flushQueueImpl(omp_interop_val_t *Interop) { + return Plugin::success(); + } + + virtual Error syncBarrierImpl(omp_interop_val_t *Interop) { + return Plugin::error(error::ErrorCode::UNSUPPORTED, + "sync_barrier not supported"); + } + + virtual Error asyncBarrierImpl(omp_interop_val_t *Interop) { + return Plugin::error(error::ErrorCode::UNSUPPORTED, + "async_barrier not supported"); + } + protected: /// Indicate whether a device id is valid. bool isValidDeviceId(int32_t DeviceId) const { @@ -1370,31 +1417,54 @@ struct GenericPluginTy { /// Create OpenMP interop with the given interop context omp_interop_val_t *create_interop(int32_t ID, int32_t InteropContext, interop_spec_t *InteropSpec) { + assert(InteropSpec && "Interop spec is null"); auto &Device = getDevice(ID); return Device.createInterop(InteropContext, *InteropSpec); } /// Release OpenMP interop object int32_t release_interop(int32_t ID, omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + assert(Interop->DeviceId == ID && "Interop does not match device id"); auto &Device = getDevice(ID); return Device.releaseInterop(Interop); } /// Flush the queue associated with the interop object if necessary - virtual int32_t flush_queue(omp_interop_val_t *Interop) { + int32_t flush_queue(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = flushQueueImpl(Interop); + if (Err) { + REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } return OFFLOAD_SUCCESS; } - - /// Queue a synchronous barrier in the queue associated with the interop + /// Perform a host synchronization with the queue associated with the interop /// object and wait for it to complete. - virtual int32_t sync_barrier(omp_interop_val_t *Interop) { - return OFFLOAD_FAIL; + int32_t sync_barrier(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = syncBarrierImpl(Interop); + if (Err) { + REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } /// Queue an asynchronous barrier in the queue associated with the interop /// object and return immediately. - virtual int32_t async_barrier(omp_interop_val_t *Interop) { - return OFFLOAD_FAIL; + int32_t async_barrier(omp_interop_val_t *Interop) { + assert(Interop && "Interop is null"); + auto Err = asyncBarrierImpl(Interop); + if (Err) { + REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } private: @@ -1429,39 +1499,6 @@ struct GenericPluginTy { RecordReplayTy *RecordReplay; }; -namespace Plugin { -/// Create a success error. This is the same as calling Error::success(), but -/// it is recommended to use this one for consistency with Plugin::error() and -/// Plugin::check(). -static inline Error success() { return Error::success(); } - -/// Create an Offload error. -template -static Error error(error::ErrorCode Code, const char *ErrFmt, ArgsTy... Args) { - return error::createOffloadError(Code, ErrFmt, Args...); -} - -inline Error error(error::ErrorCode Code, const char *S) { - return make_error(Code, S); -} - -inline Error error(error::ErrorCode Code, Error &&OtherError, - const char *Context) { - return error::createOffloadError(Code, std::move(OtherError), Context); -} - -/// Check the plugin-specific error code and return an error or success -/// accordingly. In case of an error, create a string error with the error -/// description. The ErrFmt should follow the format: -/// "Error in []: %s" -/// The last format specifier "%s" is mandatory and will be used to place the -/// error code's description. Notice this function should be only called from -/// the plugin-specific code. -/// TODO: Refactor this, must be defined individually by each plugin. -template -static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); -} // namespace Plugin - /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class /// acts as a reference to a device resource, such as a stream, and requires /// some basic functions to be implemented. The derived class should define an >From 5774a89037b0824f88c0a71b7293d1c71bfc77ab Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 11:56:43 +0200 Subject: [PATCH 13/16] Add error handling to create/releaseInterop --- .../common/include/PluginInterface.h | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 9e16efd37554b..dd565e6f6ca27 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -971,13 +971,13 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } - virtual omp_interop_val_t *createInterop(int32_t InteropType, + virtual Expected createInterop(int32_t InteropType, interop_spec_t &InteropSpec) { return nullptr; } - virtual int32_t releaseInterop(omp_interop_val_t *Interop) { - return OFFLOAD_SUCCESS; + virtual Error releaseInterop(omp_interop_val_t *Interop) { + return Plugin::success(); } virtual interop_spec_t selectInteropPreference(int32_t InteropType, @@ -1419,7 +1419,13 @@ struct GenericPluginTy { interop_spec_t *InteropSpec) { assert(InteropSpec && "Interop spec is null"); auto &Device = getDevice(ID); - return Device.createInterop(InteropContext, *InteropSpec); + auto InteropOrErr = Device.createInterop(InteropContext, *InteropSpec); + if (!InteropOrErr) { + REPORT("Failure to create interop object for device " DPxMOD ": %s\n", + DPxPTR(InteropSpec), toString(InteropOrErr.takeError()).c_str()); + return nullptr; + } + return *InteropOrErr; } /// Release OpenMP interop object @@ -1427,7 +1433,13 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); assert(Interop->DeviceId == ID && "Interop does not match device id"); auto &Device = getDevice(ID); - return Device.releaseInterop(Interop); + auto Err = Device.releaseInterop(Interop); + if (Err) { + REPORT("Failure to release interop object " DPxMOD ": %s\n", + DPxPTR(Interop), toString(std::move(Err)).c_str()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; } /// Flush the queue associated with the interop object if necessary >From a1f81c3d92e2bf3208ed8780853634a8e0eaa8ef Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 13:08:56 +0200 Subject: [PATCH 14/16] Remove interfaces with implicity DeviceTy from interop object --- offload/include/OpenMP/InteropAPI.h | 7 +- offload/libomptarget/OpenMP/InteropAPI.cpp | 85 +++++++++++----------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index 5b3c230900695..d23e507f48546 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -108,6 +108,8 @@ typedef struct omp_interop_val_t { clearCompletionCbs(); } + llvm::Expected getDevice() const; + bool hasOwner() const { return OwnerGtid != -1; } void setOwner(int gtid) { OwnerGtid = gtid; } @@ -124,11 +126,6 @@ typedef struct omp_interop_val_t { int32_t async_barrier(DeviceTy &Device); int32_t release(DeviceTy &Device); - int32_t flush(); - int32_t syncBarrier(); - int32_t asyncBarrier(); - int32_t release(); - void addCompletionCb(ompx_interop_cb_t *cb, void *data) { CompletionCbs.push_back(omp_interop_cb_instance_t(cb, data)); } diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 51d411e5ab49a..69c6469f9512f 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -298,12 +298,20 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, } } + auto DeviceOrErr = Interop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + return OFFLOAD_FAIL; + } + auto &IOPDevice = *DeviceOrErr; + if (Interop->async_info && Interop->async_info->Queue) { if (Nowait) - Interop->asyncBarrier(); + Interop->async_barrier(IOPDevice); else { - Interop->flush(); - Interop->syncBarrier(); + Interop->flush(IOPDevice); + Interop->sync_barrier(IOPDevice); Interop->markClean(); } } @@ -328,7 +336,14 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, } } - return Interop->release(); + auto DeviceOrErr = Interop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + return OFFLOAD_FAIL; + } + + return Interop->release(*DeviceOrErr); } EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, @@ -348,6 +363,10 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, } // extern "C" +llvm::Expected omp_interop_val_t::getDevice() const { + return PM->getDevice(device_id); +} + bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, const interop_spec_t &Spec) { if (interop_type != InteropType) @@ -394,44 +413,12 @@ int32_t omp_interop_val_t::async_barrier(DeviceTy &Device) { int32_t omp_interop_val_t::release(DeviceTy &Device) { if (async_info != nullptr && (!hasOwner() || !isClean())) { - flush(); - syncBarrier(); + flush(Device); + sync_barrier(Device); } return Device.RTL->release_interop(device_id, this); } -int32_t omp_interop_val_t::flush() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return flush(Device); -} - -int32_t omp_interop_val_t::syncBarrier() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return sync_barrier(Device); -} - -int32_t omp_interop_val_t::asyncBarrier() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return async_barrier(Device); -} - -int32_t omp_interop_val_t::release() { - auto DeviceOrErr = PM->getDevice(device_id); - if (!DeviceOrErr) - FATAL_MESSAGE(device_id, "%s", toString(DeviceOrErr.takeError()).c_str()); - DeviceTy &Device = *DeviceOrErr; - return release(Device); -} - void syncImplicitInterops(int Gtid, void *Event) { if (PM->InteropTbl.size() == 0) return; @@ -443,8 +430,16 @@ void syncImplicitInterops(int Gtid, void *Event) { if (iop->async_info && iop->async_info->Queue && iop->isOwnedBy(Gtid) && !iop->isClean()) { - iop->flush(); - iop->syncBarrier(); + auto DeviceOrErr = iop->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(iop), toString(DeviceOrErr.takeError()).c_str()); + continue; + } + auto &IOPDevice = *DeviceOrErr; + + iop->flush(IOPDevice); + iop->sync_barrier(IOPDevice); iop->markClean(); // TODO: Alternate implementation option @@ -464,5 +459,13 @@ void syncImplicitInterops(int Gtid, void *Event) { void InteropTblTy::clear() { DP("Clearing Interop Table\n"); - PerThreadTable::clear([](auto &IOP) { IOP->release(); }); + PerThreadTable::clear([](auto &IOP) { + auto DeviceOrErr = IOP->getDevice(); + if (!DeviceOrErr) { + REPORT("Failed to get device for interop " DPxMOD ": %s\n", + DPxPTR(IOP), toString(DeviceOrErr.takeError()).c_str()); + return; + } + IOP->release(*DeviceOrErr); + }); } >From 6395c457b0c93b30f75350b1edc2a7818842f385 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 13:28:17 +0200 Subject: [PATCH 15/16] Adjust format --- offload/libomptarget/OpenMP/InteropAPI.cpp | 18 +++++++++--------- .../common/include/PluginInterface.h | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index 69c6469f9512f..912dcaf4cdcd5 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -300,8 +300,8 @@ int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, auto DeviceOrErr = Interop->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(Interop), + toString(DeviceOrErr.takeError()).c_str()); return OFFLOAD_FAIL; } auto &IOPDevice = *DeviceOrErr; @@ -338,8 +338,8 @@ int __tgt_interop_release(ident_t *LocRef, omp_interop_val_t *Interop, auto DeviceOrErr = Interop->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(Interop), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(Interop), + toString(DeviceOrErr.takeError()).c_str()); return OFFLOAD_FAIL; } @@ -364,7 +364,7 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, } // extern "C" llvm::Expected omp_interop_val_t::getDevice() const { - return PM->getDevice(device_id); + return PM->getDevice(device_id); } bool omp_interop_val_t::isCompatibleWith(int32_t InteropType, @@ -432,8 +432,8 @@ void syncImplicitInterops(int Gtid, void *Event) { auto DeviceOrErr = iop->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(iop), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(iop), + toString(DeviceOrErr.takeError()).c_str()); continue; } auto &IOPDevice = *DeviceOrErr; @@ -462,8 +462,8 @@ void InteropTblTy::clear() { PerThreadTable::clear([](auto &IOP) { auto DeviceOrErr = IOP->getDevice(); if (!DeviceOrErr) { - REPORT("Failed to get device for interop " DPxMOD ": %s\n", - DPxPTR(IOP), toString(DeviceOrErr.takeError()).c_str()); + REPORT("Failed to get device for interop " DPxMOD ": %s\n", DPxPTR(IOP), + toString(DeviceOrErr.takeError()).c_str()); return; } IOP->release(*DeviceOrErr); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index dd565e6f6ca27..d6a3fb95b1df0 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -971,8 +971,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } - virtual Expected createInterop(int32_t InteropType, - interop_spec_t &InteropSpec) { + virtual Expected + createInterop(int32_t InteropType, interop_spec_t &InteropSpec) { return nullptr; } @@ -1447,7 +1447,7 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); auto Err = flushQueueImpl(Interop); if (Err) { - REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", + REPORT("Failure to flush interop object " DPxMOD " queue: %s\n", DPxPTR(Interop), toString(std::move(Err)).c_str()); return OFFLOAD_FAIL; } @@ -1459,7 +1459,7 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); auto Err = syncBarrierImpl(Interop); if (Err) { - REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", + REPORT("Failure to synchronize interop object " DPxMOD ": %s\n", DPxPTR(Interop), toString(std::move(Err)).c_str()); return OFFLOAD_FAIL; } @@ -1472,7 +1472,7 @@ struct GenericPluginTy { assert(Interop && "Interop is null"); auto Err = asyncBarrierImpl(Interop); if (Err) { - REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", + REPORT("Failure to queue barrier in interop object " DPxMOD ": %s\n", DPxPTR(Interop), toString(std::move(Err)).c_str()); return OFFLOAD_FAIL; } >From e4827f6a172c02c5889b016aab4a004f8bc70fe0 Mon Sep 17 00:00:00 2001 From: Alex Duran Date: Thu, 17 Jul 2025 21:34:28 +0200 Subject: [PATCH 16/16] reorder dep_pack fields --- offload/include/OpenMP/InteropAPI.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h index d23e507f48546..9989c01cec26d 100644 --- a/offload/include/OpenMP/InteropAPI.h +++ b/offload/include/OpenMP/InteropAPI.h @@ -57,8 +57,8 @@ struct interop_ctx_t { struct dep_pack_t { int32_t ndeps; - kmp_depend_info_t *deplist; int32_t ndeps_noalias; + kmp_depend_info_t *deplist; kmp_depend_info_t *noalias_deplist; }; From openmp-commits at lists.llvm.org Thu Jul 17 15:57:28 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Thu, 17 Jul 2025 15:57:28 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <68797fd8.a70a0220.8cf23.02e9@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/6] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/6] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 3/6] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 4/6] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 5/6] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 6/6] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () From openmp-commits at lists.llvm.org Thu Jul 17 23:20:19 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?B?Um9nZXIgRmVycmVyIEliw6HDsWV6?= via Openmp-commits) Date: Thu, 17 Jul 2025 23:20:19 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <6879e7a3.050a0220.3931cc.1598@mx.google.com> ================ @@ -1143,6 +1143,83 @@ class OMPFullClause final : public OMPNoChildClause { static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final + : public OMPClause { + friend class OMPClauseReader; + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; + + /// Number of looprange arguments (always 2: first, count) + static constexpr unsigned NumArgs = 2; + Stmt *Args[NumArgs] = {nullptr, nullptr}; + + /// Set looprange 'first' expression + void setFirst(Expr *E) { Args[0] = E; } + + /// Set looprange 'count' expression + void setCount(Expr *E) { Args[1] = E; } + + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}) {} + +public: + /// Build a 'looprange' clause AST node. + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, Expr* First, Expr* Count); ---------------- rofirrim wrote: `clang-format` from LLVM 20 seems to be OK with this :thinking: https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Fri Jul 18 00:57:40 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Fri, 18 Jul 2025 00:57:40 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <6879fe74.170a0220.8c112.ce7d@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/7] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/7] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 3/7] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 4/7] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 5/7] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 6/7] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 7/7] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) From openmp-commits at lists.llvm.org Fri Jul 18 05:37:07 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Fri, 18 Jul 2025 05:37:07 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687a3ff3.170a0220.35ded4.302d@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/8] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/8] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 3/8] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 4/8] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 5/8] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 6/8] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 7/8] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 8/8] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir From openmp-commits at lists.llvm.org Fri Jul 18 08:02:45 2025 From: openmp-commits at lists.llvm.org (Brad Smith via Openmp-commits) Date: Fri, 18 Jul 2025 08:02:45 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Simplify GNU strerror_r check for Android (PR #148990) In-Reply-To: Message-ID: <687a6215.170a0220.ca055.9c66@mx.google.com> ================ @@ -708,9 +708,7 @@ static char *sys_error(int err) { int strerror_r( int, char *, size_t ); // XSI version */ -#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || \ - (defined(__BIONIC__) && defined(_GNU_SOURCE) && \ - __ANDROID_API__ >= __ANDROID_API_M__) ---------------- brad0 wrote: > openmp is distributed as part of the NDK, so i think it's too early to drop apis 21 and 22. > > @pirama-arumuga-nainar though, because for all i know we already only build openmp at a higher api level, so it's already broken. Ok, I can leave it for now. No rush. Thanks. https://github.com/llvm/llvm-project/pull/148990 From openmp-commits at lists.llvm.org Fri Jul 18 09:03:15 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Fri, 18 Jul 2025 09:03:15 -0700 (PDT) Subject: [Openmp-commits] [openmp] c244c3b - [OpenMP] [NFC] Remove dead code: building task stack (#143589) Message-ID: <687a7043.170a0220.aa548.a97e@mx.google.com> Author: Jonathan Peyton Date: 2025-07-18T12:03:12-04:00 New Revision: c244c3b2d95a1605337b1156fad412ee2c9cd8c9 URL: https://github.com/llvm/llvm-project/commit/c244c3b2d95a1605337b1156fad412ee2c9cd8c9 DIFF: https://github.com/llvm/llvm-project/commit/c244c3b2d95a1605337b1156fad412ee2c9cd8c9.diff LOG: [OpenMP] [NFC] Remove dead code: building task stack (#143589) This code hasn't been enabled since the first code changes were introduced. Remove the dead code. Added: Modified: openmp/runtime/src/kmp.h openmp/runtime/src/kmp_tasking.cpp Removed: ################################################################################ diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index f62cabee6ea84..307dc625a67e9 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -34,15 +34,6 @@ #define TASK_CURRENT_NOT_QUEUED 0 #define TASK_CURRENT_QUEUED 1 -#ifdef BUILD_TIED_TASK_STACK -#define TASK_STACK_EMPTY 0 // entries when the stack is empty -#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK -// Number of entries in each task stack array -#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS) -// Mask for determining index into stack block -#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1) -#endif // BUILD_TIED_TASK_STACK - #define TASK_NOT_PUSHED 1 #define TASK_SUCCESSFULLY_PUSHED 0 #define TASK_TIED 1 @@ -2704,23 +2695,6 @@ extern std::atomic __kmp_tdg_task_id; extern kmp_int32 __kmp_num_tdg; #endif -#ifdef BUILD_TIED_TASK_STACK - -/* Tied Task stack definitions */ -typedef struct kmp_stack_block { - kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE]; - struct kmp_stack_block *sb_next; - struct kmp_stack_block *sb_prev; -} kmp_stack_block_t; - -typedef struct kmp_task_stack { - kmp_stack_block_t ts_first_block; // first block of stack entries - kmp_taskdata_t **ts_top; // pointer to the top of stack - kmp_int32 ts_entries; // number of entries on the stack -} kmp_task_stack_t; - -#endif // BUILD_TIED_TASK_STACK - typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) /* Same fields as in the #else branch, but in reverse order */ @@ -2863,10 +2837,6 @@ typedef struct kmp_base_thread_data { kmp_int32 td_deque_ntasks; // Number of tasks in deque // GEH: shouldn't this be volatile since used in while-spin? kmp_int32 td_deque_last_stolen; // Thread number of last successful steal -#ifdef BUILD_TIED_TASK_STACK - kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task -// scheduling constraint -#endif // BUILD_TIED_TASK_STACK } kmp_base_thread_data_t; #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index d7bc4922d54f7..e4d92a78fd6b9 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -42,221 +42,6 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id); int __kmp_taskloop_task(int gtid, void *ptask); #endif -#ifdef BUILD_TIED_TASK_STACK - -// __kmp_trace_task_stack: print the tied tasks from the task stack in order -// from top do bottom -// -// gtid: global thread identifier for thread containing stack -// thread_data: thread data for task team thread containing stack -// threshold: value above which the trace statement triggers -// location: string identifying call site of this function (for trace) -static void __kmp_trace_task_stack(kmp_int32 gtid, - kmp_thread_data_t *thread_data, - int threshold, char *location) { - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_taskdata_t **stack_top = task_stack->ts_top; - kmp_int32 entries = task_stack->ts_entries; - kmp_taskdata_t *tied_task; - - KA_TRACE( - threshold, - ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " - "first_block = %p, stack_top = %p \n", - location, gtid, entries, task_stack->ts_first_block, stack_top)); - - KMP_DEBUG_ASSERT(stack_top != NULL); - KMP_DEBUG_ASSERT(entries > 0); - - while (entries != 0) { - KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]); - // fix up ts_top if we need to pop from previous block - if (entries & TASK_STACK_INDEX_MASK == 0) { - kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top); - - stack_block = stack_block->sb_prev; - stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; - } - - // finish bookkeeping - stack_top--; - entries--; - - tied_task = *stack_top; - - KMP_DEBUG_ASSERT(tied_task != NULL); - KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - - KA_TRACE(threshold, - ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " - "stack_top=%p, tied_task=%p\n", - location, gtid, entries, stack_top, tied_task)); - } - KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]); - - KA_TRACE(threshold, - ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", - location, gtid)); -} - -// __kmp_init_task_stack: initialize the task stack for the first time -// after a thread_data structure is created. -// It should not be necessary to do this again (assuming the stack works). -// -// gtid: global thread identifier of calling thread -// thread_data: thread data for task team thread containing stack -static void __kmp_init_task_stack(kmp_int32 gtid, - kmp_thread_data_t *thread_data) { - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *first_block; - - // set up the first block of the stack - first_block = &task_stack->ts_first_block; - task_stack->ts_top = (kmp_taskdata_t **)first_block; - memset((void *)first_block, '\0', - TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); - - // initialize the stack to be empty - task_stack->ts_entries = TASK_STACK_EMPTY; - first_block->sb_next = NULL; - first_block->sb_prev = NULL; -} - -// __kmp_free_task_stack: free the task stack when thread_data is destroyed. -// -// gtid: global thread identifier for calling thread -// thread_data: thread info for thread containing stack -static void __kmp_free_task_stack(kmp_int32 gtid, - kmp_thread_data_t *thread_data) { - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *stack_block = &task_stack->ts_first_block; - - KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY); - // free from the second block of the stack - while (stack_block != NULL) { - kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL; - - stack_block->sb_next = NULL; - stack_block->sb_prev = NULL; - if (stack_block != &task_stack->ts_first_block) { - __kmp_thread_free(thread, - stack_block); // free the block, if not the first - } - stack_block = next_block; - } - // initialize the stack to be empty - task_stack->ts_entries = 0; - task_stack->ts_top = NULL; -} - -// __kmp_push_task_stack: Push the tied task onto the task stack. -// Grow the stack if necessary by allocating another block. -// -// gtid: global thread identifier for calling thread -// thread: thread info for thread containing stack -// tied_task: the task to push on the stack -static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread, - kmp_taskdata_t *tied_task) { - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = - &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - - if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) { - return; // Don't push anything on stack if team or team tasks are serialized - } - - KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); - - KA_TRACE(20, - ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", - gtid, thread, tied_task)); - // Store entry - *(task_stack->ts_top) = tied_task; - - // Do bookkeeping for next push - task_stack->ts_top++; - task_stack->ts_entries++; - - if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { - // Find beginning of this task block - kmp_stack_block_t *stack_block = - (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE); - - // Check if we already have a block - if (stack_block->sb_next != - NULL) { // reset ts_top to beginning of next block - task_stack->ts_top = &stack_block->sb_next->sb_block[0]; - } else { // Alloc new block and link it up - kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc( - thread, sizeof(kmp_stack_block_t)); - - task_stack->ts_top = &new_block->sb_block[0]; - stack_block->sb_next = new_block; - new_block->sb_prev = stack_block; - new_block->sb_next = NULL; - - KA_TRACE( - 30, - ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", - gtid, tied_task, new_block)); - } - } - KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, - tied_task)); -} - -// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return -// the task, just check to make sure it matches the ending task passed in. -// -// gtid: global thread identifier for the calling thread -// thread: thread info structure containing stack -// tied_task: the task popped off the stack -// ending_task: the task that is ending (should match popped task) -static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread, - kmp_taskdata_t *ending_task) { - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = - &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)]; - kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; - kmp_taskdata_t *tied_task; - - if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) { - // Don't pop anything from stack if team or team tasks are serialized - return; - } - - KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); - KMP_DEBUG_ASSERT(task_stack->ts_entries > 0); - - KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, - thread)); - - // fix up ts_top if we need to pop from previous block - if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { - kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top); - - stack_block = stack_block->sb_prev; - task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; - } - - // finish bookkeeping - task_stack->ts_top--; - task_stack->ts_entries--; - - tied_task = *(task_stack->ts_top); - - KMP_DEBUG_ASSERT(tied_task != NULL); - KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly - - KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, - tied_task)); - return; -} -#endif /* BUILD_TIED_TASK_STACK */ - // returns 1 if new task is allowed to execute, 0 otherwise // checks Task Scheduling constraint (if requested) and // mutexinoutset dependencies if any @@ -683,13 +468,6 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); current_task->td_flags.executing = 0; -// Add task to stack if tied -#ifdef BUILD_TIED_TASK_STACK - if (taskdata->td_flags.tiedness == TASK_TIED) { - __kmp_push_task_stack(gtid, thread, taskdata); - } -#endif /* BUILD_TIED_TASK_STACK */ - // mark starting task as executing and as current task thread->th.th_current_task = taskdata; @@ -1041,13 +819,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, is_taskgraph = taskdata->is_taskgraph; #endif -// Pop task from stack if tied -#ifdef BUILD_TIED_TASK_STACK - if (taskdata->td_flags.tiedness == TASK_TIED) { - __kmp_pop_task_stack(gtid, thread, taskdata); - } -#endif /* BUILD_TIED_TASK_STACK */ - if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) { // untied task needs to check the counter so that the task structure is not // freed prematurely @@ -3786,13 +3557,6 @@ static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) { thread_data->td.td_deque = NULL; __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); } - -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out what to do here for td_susp_tied_tasks - if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) { - __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data); - } -#endif // BUILD_TIED_TASK_STACK } // __kmp_realloc_task_threads_data: @@ -3849,14 +3613,7 @@ static int __kmp_realloc_task_threads_data(kmp_info_t *thread, KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t), (void *)old_data, maxthreads * sizeof(kmp_thread_data_t)); -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = maxthreads; i < nthreads; i++) { - kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; - __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); - } -#endif // BUILD_TIED_TASK_STACK - // Install the new data and free the old data + // Install the new data and free the old data (*threads_data_p) = new_data; __kmp_free(old_data); } else { @@ -3868,13 +3625,6 @@ static int __kmp_realloc_task_threads_data(kmp_info_t *thread, // kmp_reap_task_team( ). *threads_data_p = (kmp_thread_data_t *)__kmp_allocate( nthreads * sizeof(kmp_thread_data_t)); -#ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; - __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); - } -#endif // BUILD_TIED_TASK_STACK } task_team->tt.tt_max_threads = nthreads; } else { From openmp-commits at lists.llvm.org Fri Jul 18 09:05:29 2025 From: openmp-commits at lists.llvm.org (Terry Wilmarth via Openmp-commits) Date: Fri, 18 Jul 2025 09:05:29 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Improve performance of ticket lock (x86) (PR #143557) In-Reply-To: Message-ID: <687a70c9.170a0220.2c86d1.abab@mx.google.com> TerryLWilmarth wrote: ping. @hansangbae please review and complete this. Thanks! https://github.com/llvm/llvm-project/pull/143557 From openmp-commits at lists.llvm.org Fri Jul 18 09:07:26 2025 From: openmp-commits at lists.llvm.org (Terry Wilmarth via Openmp-commits) Date: Fri, 18 Jul 2025 09:07:26 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] [NFC] Remove KMP_NESTED_HOT_TEAMS macro (PR #143584) In-Reply-To: Message-ID: <687a713e.170a0220.29eae2.af64@mx.google.com> https://github.com/TerryLWilmarth approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/143584 From openmp-commits at lists.llvm.org Fri Jul 18 09:03:18 2025 From: openmp-commits at lists.llvm.org (Terry Wilmarth via Openmp-commits) Date: Fri, 18 Jul 2025 09:03:18 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] [NFC] Remove dead code: building task stack (PR #143589) In-Reply-To: Message-ID: <687a7046.a70a0220.317c8.a5fc@mx.google.com> https://github.com/TerryLWilmarth closed https://github.com/llvm/llvm-project/pull/143589 From openmp-commits at lists.llvm.org Fri Jul 18 09:09:56 2025 From: openmp-commits at lists.llvm.org (Terry Wilmarth via Openmp-commits) Date: Fri, 18 Jul 2025 09:09:56 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Fixup bugs found during fuzz testing (PR #143455) In-Reply-To: Message-ID: <687a71d4.a70a0220.20cf45.ab80@mx.google.com> https://github.com/TerryLWilmarth approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/143455 From openmp-commits at lists.llvm.org Fri Jul 18 12:52:24 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Fri, 18 Jul 2025 12:52:24 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687aa5f8.050a0220.179512.d28d@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 1/9] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 2/9] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 3/9] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 4/9] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 5/9] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 6/9] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 7/9] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 8/9] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 8ec6d03939c3a7e2f79bb3ea5911b7607028dd7c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 21:52:02 +0200 Subject: [PATCH 9/9] Also add dependency barrier for WIN32 --- flang-rt/lib/runtime/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 1b8114c102205..20f5d84bb2b69 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -281,14 +281,18 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") - add_flangrt_library(${name}.intrinsics OBJECT + add_flangrt_library(${name}.intrinsics.obj OBJECT ${intrinsics_sources} ) + add_custom_target(${name}.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) add_flangrt_library(${name} ${libtype} - ${sources} $ + ${sources} $ ${ARGN} - LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -311,8 +315,9 @@ else() endif () get_target_property(compile_target ${name}.compile ALIASED_TARGET) - flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) + add_dependencies(${compile_target} ${name}.intrinsics) add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") From openmp-commits at lists.llvm.org Fri Jul 18 14:16:40 2025 From: openmp-commits at lists.llvm.org (Alexey Bataev via Openmp-commits) Date: Fri, 18 Jul 2025 14:16:40 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Clang][OpenMP][LoopTransformations] Add support for "#pragma omp fuse" loop transformation directive and "looprange" clause (PR #139293) In-Reply-To: Message-ID: <687ab9b8.630a0220.2b5166.d465@mx.google.com> alexey-bataev wrote: Need a rebase https://github.com/llvm/llvm-project/pull/139293 From openmp-commits at lists.llvm.org Sat Jul 19 04:17:07 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Sat, 19 Jul 2025 04:17:07 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687b7eb3.a70a0220.31f104.ed48@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 01/11] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 02/11] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 03/11] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 04/11] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 05/11] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 06/11] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 07/11] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 08/11] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 8ec6d03939c3a7e2f79bb3ea5911b7607028dd7c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 21:52:02 +0200 Subject: [PATCH 09/11] Also add dependency barrier for WIN32 --- flang-rt/lib/runtime/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 1b8114c102205..20f5d84bb2b69 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -281,14 +281,18 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") - add_flangrt_library(${name}.intrinsics OBJECT + add_flangrt_library(${name}.intrinsics.obj OBJECT ${intrinsics_sources} ) + add_custom_target(${name}.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) add_flangrt_library(${name} ${libtype} - ${sources} $ + ${sources} $ ${ARGN} - LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -311,8 +315,9 @@ else() endif () get_target_property(compile_target ${name}.compile ALIASED_TARGET) - flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) + add_dependencies(${compile_target} ${name}.intrinsics) add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") >From 580a0c56535a9cd1c65fd5a5e6309b73c36ae8b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:05:34 +0200 Subject: [PATCH 10/11] Dependency barrier test --- flang-rt/lib/runtime/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 20f5d84bb2b69..bdd4318832473 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,6 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics + COMMAND echo "Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) @@ -295,6 +296,7 @@ else() LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) + get_target_property(compile_target ${name}.compile ALIASED_TARGET) if (msvc_lib) set_target_properties(${name} @@ -311,14 +313,13 @@ else() set(is_public "") else () set(is_public PUBLIC) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) set(_has_public_intrinsics "YES" PARENT_SCOPE) endif () - get_target_property(compile_target ${name}.compile ALIASED_TARGET) flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) add_dependencies(${compile_target} ${name}.intrinsics) - add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") >From 979691a5bba4888be8c7c82d1bed4e8cdc71fff9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:06:40 +0200 Subject: [PATCH 11/11] Dependency barrier info --- flang-rt/lib/runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index bdd4318832473..24b84b11f9513 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,7 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics - COMMAND echo "Dependency barrier" + COMMAND echo "${name} Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) From openmp-commits at lists.llvm.org Sat Jul 19 05:32:04 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Sat, 19 Jul 2025 05:32:04 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687b9044.170a0220.2f342b.fe74@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Sat Jul 19 05:32:34 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Sat, 19 Jul 2025 05:32:34 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687b9062.170a0220.38494f.fbc2@mx.google.com> https://github.com/Meinersbur ready_for_review https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Sun Jul 20 08:39:59 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Sun, 20 Jul 2025 08:39:59 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Improve performance of ticket lock (x86) (PR #143557) In-Reply-To: Message-ID: <687d0dcf.170a0220.1571ed.2976@mx.google.com> https://github.com/jprotze approved this pull request. I think, having the KMP_YIELD macro in the commit message would make it easier to understand the actual problem when just looking at the diff of the patch. After checking the define the issue is clear and the patch LGTM. ``` C #define KMP_YIELD(cond) \ { \ KMP_CPU_PAUSE(); \ if ((cond) && (KMP_TRY_YIELD)) \ __kmp_yield(); \ } ``` https://github.com/llvm/llvm-project/pull/143557 From openmp-commits at lists.llvm.org Sun Jul 20 22:09:58 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Sun, 20 Jul 2025 22:09:58 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <687dcba6.630a0220.d9f34.3850@mx.google.com> ================ @@ -34,10 +87,96 @@ typedef struct omp_interop_val_t { __tgt_device_info device_info; const kmp_interop_type_t interop_type; const intptr_t device_id; - const omp_foreign_runtime_ids_t vendor_id = cuda; - const intptr_t backend_type_id = omp_interop_backend_type_cuda_1; + omp_vendor_id_t vendor_id = omp_vendor_llvm; + omp_foreign_runtime_id_t fr_id = omp_fr_none; + interop_attrs_t attrs{false, 0}; // Common prefer specification attributes + int64_t impl_attrs = 0; // Implementation prefer specification attributes ---------------- kevinsala wrote: I'd probably rename the fields to xxx_yyy then. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Sun Jul 20 22:17:04 2025 From: openmp-commits at lists.llvm.org (Kevin Sala Penades via Openmp-commits) Date: Sun, 20 Jul 2025 22:17:04 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OFFLOAD][OPENMP] 6.0 compatible interop interface (PR #143491) In-Reply-To: Message-ID: <687dcd50.170a0220.3b9f3f.d338@mx.google.com> ================ @@ -80,15 +80,18 @@ typedef enum omp_interop_rc { omp_irc_other = -6 } omp_interop_rc_t; -typedef enum omp_interop_fr { - omp_ifr_cuda = 1, - omp_ifr_cuda_driver = 2, - omp_ifr_opencl = 3, - omp_ifr_sycl = 4, - omp_ifr_hip = 5, - omp_ifr_level_zero = 6, - omp_ifr_last = 7 -} omp_interop_fr_t; ---------------- kevinsala wrote: Renaming the prefix to `tgt_` sounds good. https://github.com/llvm/llvm-project/pull/143491 From openmp-commits at lists.llvm.org Mon Jul 21 06:26:24 2025 From: openmp-commits at lists.llvm.org (Hansang Bae via Openmp-commits) Date: Mon, 21 Jul 2025 06:26:24 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Improve performance of ticket lock (x86) (PR #143557) In-Reply-To: Message-ID: <687e4000.630a0220.adcf9.915d@mx.google.com> https://github.com/hansangbae edited https://github.com/llvm/llvm-project/pull/143557 From openmp-commits at lists.llvm.org Mon Jul 21 06:26:40 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 21 Jul 2025 06:26:40 -0700 (PDT) Subject: [Openmp-commits] [openmp] c9fe19a - [OpenMP] Improve performance of ticket lock (x86) (#143557) Message-ID: <687e4010.170a0220.3c6f5f.aa96@mx.google.com> Author: Jonathan Peyton Date: 2025-07-21T08:26:35-05:00 New Revision: c9fe19a99bf41c165524dcb3e9ff939527b5178b URL: https://github.com/llvm/llvm-project/commit/c9fe19a99bf41c165524dcb3e9ff939527b5178b DIFF: https://github.com/llvm/llvm-project/commit/c9fe19a99bf41c165524dcb3e9ff939527b5178b.diff LOG: [OpenMP] Improve performance of ticket lock (x86) (#143557) Ticket lock has a yield operation (shown below) which degrades performance on larger server machines due to an unconditional pause operation. ``` #define KMP_YIELD(cond) \ { \ KMP_CPU_PAUSE(); \ if ((cond) && (KMP_TRY_YIELD)) \ __kmp_yield(); \ } ``` Added: Modified: openmp/runtime/src/kmp_lock.cpp Removed: ################################################################################ diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 11fa233c4bd27..47dcc806e20d1 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -712,16 +712,9 @@ static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, } int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { - kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, - std::memory_order_relaxed) - - std::atomic_load_explicit(&lck->lk.now_serving, - std::memory_order_relaxed); - std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, std::memory_order_release); - KMP_YIELD(distance > - (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); return KMP_LOCK_RELEASED; } From openmp-commits at lists.llvm.org Mon Jul 21 06:26:42 2025 From: openmp-commits at lists.llvm.org (Hansang Bae via Openmp-commits) Date: Mon, 21 Jul 2025 06:26:42 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Improve performance of ticket lock (x86) (PR #143557) In-Reply-To: Message-ID: <687e4012.170a0220.31d4f2.9c9b@mx.google.com> https://github.com/hansangbae closed https://github.com/llvm/llvm-project/pull/143557 From openmp-commits at lists.llvm.org Mon Jul 21 06:27:41 2025 From: openmp-commits at lists.llvm.org (Hansang Bae via Openmp-commits) Date: Mon, 21 Jul 2025 06:27:41 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Improve performance of ticket lock (x86) (PR #143557) In-Reply-To: Message-ID: <687e404d.050a0220.142e18.877e@mx.google.com> hansangbae wrote: Merged the change with the suggested commit message. Thanks! https://github.com/llvm/llvm-project/pull/143557 From openmp-commits at lists.llvm.org Mon Jul 21 12:48:15 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 12:48:15 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687e997f.050a0220.845a8.fb17@mx.google.com> ================ @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty ---------------- Meinersbur wrote: With this PR, the default intrinsic path is now determined by the driver, not by the frontend. To emulate driver behavior, the test needs to add the default intrinsics path manually. Alternatively, this test could invoke the driver instead. It's in the `test/Driver` directory after all. https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 12:56:48 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 12:56:48 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687e9b80.170a0220.3552ac.000b@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 01/12] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 02/12] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 03/12] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 04/12] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 05/12] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 06/12] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 07/12] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 08/12] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 8ec6d03939c3a7e2f79bb3ea5911b7607028dd7c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 21:52:02 +0200 Subject: [PATCH 09/12] Also add dependency barrier for WIN32 --- flang-rt/lib/runtime/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 1b8114c102205..20f5d84bb2b69 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -281,14 +281,18 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") - add_flangrt_library(${name}.intrinsics OBJECT + add_flangrt_library(${name}.intrinsics.obj OBJECT ${intrinsics_sources} ) + add_custom_target(${name}.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) add_flangrt_library(${name} ${libtype} - ${sources} $ + ${sources} $ ${ARGN} - LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -311,8 +315,9 @@ else() endif () get_target_property(compile_target ${name}.compile ALIASED_TARGET) - flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) + add_dependencies(${compile_target} ${name}.intrinsics) add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") >From 580a0c56535a9cd1c65fd5a5e6309b73c36ae8b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:05:34 +0200 Subject: [PATCH 10/12] Dependency barrier test --- flang-rt/lib/runtime/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 20f5d84bb2b69..bdd4318832473 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,6 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics + COMMAND echo "Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) @@ -295,6 +296,7 @@ else() LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) + get_target_property(compile_target ${name}.compile ALIASED_TARGET) if (msvc_lib) set_target_properties(${name} @@ -311,14 +313,13 @@ else() set(is_public "") else () set(is_public PUBLIC) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) set(_has_public_intrinsics "YES" PARENT_SCOPE) endif () - get_target_property(compile_target ${name}.compile ALIASED_TARGET) flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) add_dependencies(${compile_target} ${name}.intrinsics) - add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") >From 979691a5bba4888be8c7c82d1bed4e8cdc71fff9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:06:40 +0200 Subject: [PATCH 11/12] Dependency barrier info --- flang-rt/lib/runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index bdd4318832473..24b84b11f9513 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,7 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics - COMMAND echo "Dependency barrier" + COMMAND echo "${name} Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) >From b4adeab58e615b9059c0a6e5bbcb376d1fe21bb2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 21 Jul 2025 21:56:26 +0200 Subject: [PATCH 12/12] Avoid unrelated changes --- clang/include/clang/Driver/Driver.h | 1 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 9343fed36b6ac..14e1e644e51aa 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,6 +403,7 @@ class Driver { SmallString<128> &CrashDiagDir); public: + /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 688b5aacc4bcd..609ba27bc212b 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1376,10 +1376,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError( - loc, llvm::Twine("runtime derived type info descriptor of '") + name + - "' was not generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError(loc, + "runtime derived type info descriptor was not " + "generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition From openmp-commits at lists.llvm.org Mon Jul 21 12:58:44 2025 From: openmp-commits at lists.llvm.org (Slava Zakharin via Openmp-commits) Date: Mon, 21 Jul 2025 12:58:44 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687e9bf4.050a0220.3c93d6.048d@mx.google.com> ================ @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty ---------------- vzakhari wrote: Oh, I see. Thanks! https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 14:49:11 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 21 Jul 2025 14:49:11 -0700 (PDT) Subject: [Openmp-commits] [openmp] de011e3 - [OpenMP] [NFC] Remove KMP_NESTED_HOT_TEAMS macro (#143584) Message-ID: <687eb5d7.170a0220.db915.c71c@mx.google.com> Author: Jonathan Peyton Date: 2025-07-21T17:49:07-04:00 New Revision: de011e372dff540056b4abdf02de94061f5ddb86 URL: https://github.com/llvm/llvm-project/commit/de011e372dff540056b4abdf02de94061f5ddb86 DIFF: https://github.com/llvm/llvm-project/commit/de011e372dff540056b4abdf02de94061f5ddb86.diff LOG: [OpenMP] [NFC] Remove KMP_NESTED_HOT_TEAMS macro (#143584) The feature was introduced back in 2014 and has been on ever since. Leave the feature in place. Removing only the macro. Added: Modified: openmp/runtime/src/kmp.h openmp/runtime/src/kmp_config.h.cmake openmp/runtime/src/kmp_global.cpp openmp/runtime/src/kmp_runtime.cpp openmp/runtime/src/kmp_settings.cpp Removed: ################################################################################ diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 307dc625a67e9..818edf9060ad1 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -160,17 +160,6 @@ class kmp_stats_list; #define USE_FAST_MEMORY 3 #endif -#ifndef KMP_NESTED_HOT_TEAMS -#define KMP_NESTED_HOT_TEAMS 0 -#define USE_NESTED_HOT_ARG(x) -#else -#if KMP_NESTED_HOT_TEAMS -#define USE_NESTED_HOT_ARG(x) , x -#else -#define USE_NESTED_HOT_ARG(x) -#endif -#endif - // Assume using BGET compare_exchange instruction instead of lock by default. #ifndef USE_CMP_XCHG_FOR_BGET #define USE_CMP_XCHG_FOR_BGET 1 @@ -2913,14 +2902,12 @@ typedef struct kmp_free_list { // sync list) } kmp_free_list_t; #endif -#if KMP_NESTED_HOT_TEAMS // Hot teams array keeps hot teams and their sizes for given thread. Hot teams // are not put in teams pool, and they don't put threads in threads pool. typedef struct kmp_hot_team_ptr { kmp_team_p *hot_team; // pointer to hot_team of given nesting level kmp_int32 hot_team_nth; // number of threads allocated for the hot_team } kmp_hot_team_ptr_t; -#endif typedef struct kmp_teams_size { kmp_int32 nteams; // number of teams in a league kmp_int32 nth; // number of threads in each team of the league @@ -2995,9 +2982,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { int th_nt_sev; // error severity for strict modifier const char *th_nt_msg; // error message for strict modifier int th_set_nested_nth_sz; -#if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ -#endif kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ kmp_teams_size_t @@ -3553,10 +3538,8 @@ extern int __kmp_dflt_max_active_levels; extern bool __kmp_dflt_max_active_levels_set; extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */ -#if KMP_NESTED_HOT_TEAMS extern int __kmp_hot_teams_mode; extern int __kmp_hot_teams_max_level; -#endif #if KMP_MIC_SUPPORTED extern enum mic_type __kmp_mic_type; @@ -4040,16 +4023,16 @@ extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th); extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, int tid); -extern kmp_team_t * -__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, +extern kmp_team_t *__kmp_allocate_team(kmp_root_t *root, int new_nproc, + int max_nproc, #if OMPT_SUPPORT - ompt_data_t ompt_parallel_data, + ompt_data_t ompt_parallel_data, #endif - kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); + kmp_proc_bind_t proc_bind, + kmp_internal_control_t *new_icvs, + int argc, kmp_info_t *thr); extern void __kmp_free_thread(kmp_info_t *); -extern void __kmp_free_team(kmp_root_t *, - kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *)); +extern void __kmp_free_team(kmp_root_t *, kmp_team_t *, kmp_info_t *); extern kmp_team_t *__kmp_reap_team(kmp_team_t *); /* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index d64c9a4b557df..40f1087fd7f27 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -114,7 +114,6 @@ # define BUILD_I8 1 #endif -#define KMP_NESTED_HOT_TEAMS 1 #define KMP_ADJUST_BLOCKTIME 1 #define BUILD_PARALLEL_ORDERED 1 #define KMP_ASM_INTRINS 1 diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 87c0a66a16c0a..323d13e948b42 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -135,11 +135,9 @@ int __kmp_tp_cached = 0; int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; int __kmp_dflt_max_active_levels = 1; // Nesting off by default bool __kmp_dflt_max_active_levels_set = false; // Don't override set value -#if KMP_NESTED_HOT_TEAMS int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ /* 1 - keep extra threads when reduced */ int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ -#endif enum library_type __kmp_library = library_none; enum sched_type __kmp_sched = kmp_sch_default; /* scheduling method for runtime scheduling */ diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 6afea9b994de4..acc43e8fd92de 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -977,8 +977,7 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, master_th->th.th_team_serialized = FALSE; master_th->th.th_dispatch = &team->t.t_dispatch[0]; -/* make sure we are not the optimized hot team */ -#if KMP_NESTED_HOT_TEAMS + /* make sure we are not the optimized hot team */ use_hot_team = 0; kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; if (hot_teams) { // hot teams array is not allocated if @@ -1009,9 +1008,6 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, use_hot_team = 0; } } -#else - use_hot_team = team == root->r.r_hot_team; -#endif if (!use_hot_team) { /* install the primary thread */ @@ -1255,13 +1251,12 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - new_team = - __kmp_allocate_team(this_thr->th.th_root, 1, 1, + new_team = __kmp_allocate_team( + this_thr->th.th_root, 1, 1, #if OMPT_SUPPORT - ompt_parallel_data, + ompt_parallel_data, #endif - proc_bind, &this_thr->th.th_current_task->td_icvs, - 0 USE_NESTED_HOT_ARG(NULL)); + proc_bind, &this_thr->th.th_current_task->td_icvs, 0, NULL); __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); KMP_ASSERT(new_team); @@ -1952,9 +1947,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, int level; int active_level; int teams_level; -#if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t **p_hot_teams; -#endif { // KMP_TIME_BLOCK KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); @@ -2012,7 +2005,6 @@ int __kmp_fork_call(ident_t *loc, int gtid, active_level = parent_team->t.t_active_level; // needed to check nesting inside the teams teams_level = master_th->th.th_teams_level; -#if KMP_NESTED_HOT_TEAMS p_hot_teams = &master_th->th.th_hot_teams; if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( @@ -2021,7 +2013,6 @@ int __kmp_fork_call(ident_t *loc, int gtid, // it is either actual or not needed (when active_level > 0) (*p_hot_teams)[0].hot_team_nth = 1; } -#endif #if OMPT_SUPPORT if (ompt_enabled.enabled) { @@ -2200,20 +2191,18 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT ompt_parallel_data, #endif - proc_bind, &new_icvs, - argc USE_NESTED_HOT_ARG(master_th)); + proc_bind, &new_icvs, argc, master_th); if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs); } else { /* allocate a new parallel team */ KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); - team = __kmp_allocate_team(root, nthreads, nthreads, + team = __kmp_allocate_team( + root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_data, + ompt_parallel_data, #endif - proc_bind, - &master_th->th.th_current_task->td_icvs, - argc USE_NESTED_HOT_ARG(master_th)); + proc_bind, &master_th->th.th_current_task->td_icvs, argc, master_th); if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &master_th->th.th_current_task->td_icvs); @@ -2699,8 +2688,7 @@ void __kmp_join_call(ident_t *loc, int gtid if (root->r.r_active != master_active) root->r.r_active = master_active; - __kmp_free_team(root, team USE_NESTED_HOT_ARG( - master_th)); // this will free worker threads + __kmp_free_team(root, team, master_th); // this will free worker threads /* this race was fun to find. make sure the following is in the critical region otherwise assertions may fail occasionally since the old team may be @@ -2716,8 +2704,7 @@ void __kmp_join_call(ident_t *loc, int gtid if (parent_team->t.t_serialized && parent_team != master_th->th.th_serial_team && parent_team != root->r.r_root_team) { - __kmp_free_team(root, - master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); + __kmp_free_team(root, master_th->th.th_serial_team, NULL); master_th->th.th_serial_team = parent_team; } @@ -2823,11 +2810,8 @@ void __kmp_set_num_threads(int new_nth, int gtid) { // rather than waiting for the next parallel region. root = thread->th.th_root; if (__kmp_init_parallel && (!root->r.r_active) && - (root->r.r_hot_team->t.t_nproc > new_nth) -#if KMP_NESTED_HOT_TEAMS - && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode -#endif - ) { + (root->r.r_hot_team->t.t_nproc > new_nth) && __kmp_hot_teams_max_level && + !__kmp_hot_teams_mode) { kmp_team_t *hot_team = root->r.r_hot_team; int f; @@ -2848,12 +2832,10 @@ void __kmp_set_num_threads(int new_nth, int gtid) { hot_team->t.t_threads[f] = NULL; } hot_team->t.t_nproc = new_nth; -#if KMP_NESTED_HOT_TEAMS if (thread->th.th_hot_teams) { KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); thread->th.th_hot_teams[0].hot_team_nth = new_nth; } -#endif if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { hot_team->t.b->update_num_threads(new_nth); @@ -3375,17 +3357,16 @@ static void __kmp_initialize_root(kmp_root_t *root) { /* allocate the root team structure */ KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); - root_team = - __kmp_allocate_team(root, - 1, // new_nproc - 1, // max_nproc + root_team = __kmp_allocate_team(root, + 1, // new_nproc + 1, // max_nproc #if OMPT_SUPPORT - ompt_data_none, // root parallel id + ompt_data_none, // root parallel id #endif - __kmp_nested_proc_bind.bind_types[0], &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // primary thread is unknown - ); + __kmp_nested_proc_bind.bind_types[0], &r_icvs, + 0, // argc + NULL // primary thread is unknown + ); #if USE_DEBUGGER // Non-NULL value should be assigned to make the debugger display the root // team. @@ -3413,17 +3394,16 @@ static void __kmp_initialize_root(kmp_root_t *root) { /* allocate the hot team structure */ KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); - hot_team = - __kmp_allocate_team(root, - 1, // new_nproc - __kmp_dflt_team_nth_ub * 2, // max_nproc + hot_team = __kmp_allocate_team(root, + 1, // new_nproc + __kmp_dflt_team_nth_ub * 2, // max_nproc #if OMPT_SUPPORT - ompt_data_none, // root parallel id + ompt_data_none, // root parallel id #endif - __kmp_nested_proc_bind.bind_types[0], &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // primary thread is unknown - ); + __kmp_nested_proc_bind.bind_types[0], &r_icvs, + 0, // argc + NULL // primary thread is unknown + ); KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); root->r.r_hot_team = hot_team; @@ -3962,12 +3942,12 @@ int __kmp_register_root(int initial_thread) { if (!root_thread->th.th_serial_team) { kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); - root_thread->th.th_serial_team = __kmp_allocate_team( - root, 1, 1, + root_thread->th.th_serial_team = + __kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - ompt_data_none, // root parallel id + ompt_data_none, // root parallel id #endif - proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); + proc_bind_default, &r_icvs, 0, NULL); } KMP_ASSERT(root_thread->th.th_serial_team); KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", @@ -4073,7 +4053,6 @@ int __kmp_register_root(int initial_thread) { return gtid; } -#if KMP_NESTED_HOT_TEAMS static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, const int max_level) { int i, n, nth; @@ -4098,7 +4077,6 @@ static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, __kmp_free_team(root, team, NULL); return n; } -#endif // Resets a root thread and clear its root and hot teams. // Returns the number of __kmp_threads entries directly and indirectly freed. @@ -4114,8 +4092,7 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) { root->r.r_hot_team = NULL; // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team // before call to __kmp_free_team(). - __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); -#if KMP_NESTED_HOT_TEAMS + __kmp_free_team(root, root_team, NULL); if (__kmp_hot_teams_max_level > 0) { // need to free nested hot teams and their threads if any for (i = 0; i < hot_team->t.t_nproc; ++i) { @@ -4129,8 +4106,7 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) { } } } -#endif - __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); + __kmp_free_team(root, hot_team, NULL); // Before we can reap the thread, we need to make certain that all other // threads in the teams that had this root as ancestor have stopped trying to @@ -4437,9 +4413,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); KMP_DEBUG_ASSERT(root && team); -#if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); -#endif KMP_MB(); /* first, try to get one from the thread pool unless allocating thread is @@ -4614,8 +4587,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, #if OMPT_SUPPORT ompt_data_none, // root parallel id #endif - proc_bind_default, &r_icvs, - 0 USE_NESTED_HOT_ARG(NULL)); + proc_bind_default, &r_icvs, 0, NULL); } KMP_ASSERT(serial_team); serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for @@ -5139,14 +5111,13 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { /* allocate a new team data structure to use. take one off of the free pool if available */ -kmp_team_t * -__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, +kmp_team_t *__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_data_t ompt_parallel_data, + ompt_data_t ompt_parallel_data, #endif - kmp_proc_bind_t new_proc_bind, - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { + kmp_proc_bind_t new_proc_bind, + kmp_internal_control_t *new_icvs, int argc, + kmp_info_t *master) { KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); int f; kmp_team_t *team; @@ -5159,7 +5130,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, KMP_DEBUG_ASSERT(max_nproc >= new_nproc); KMP_MB(); -#if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t *hot_teams; if (master) { team = master->th.th_team; @@ -5193,15 +5163,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, // check we won't access uninitialized hot_teams, just in case KMP_DEBUG_ASSERT(new_nproc == 1); } -#endif // Optimization to use a "hot" team if (use_hot_team && new_nproc > 1) { KMP_DEBUG_ASSERT(new_nproc <= max_nproc); -#if KMP_NESTED_HOT_TEAMS team = hot_teams[level].hot_team; -#else - team = root->r.r_hot_team; -#endif #if KMP_DEBUG if (__kmp_tasking_mode != tskm_immediate_exec) { KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " @@ -5288,20 +5253,17 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, th->th.th_task_team = NULL; } } -#if KMP_NESTED_HOT_TEAMS if (__kmp_hot_teams_mode == 0) { // AC: saved number of threads should correspond to team's value in this // mode, can be bigger in mode 1, when hot team has threads in reserve KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); hot_teams[level].hot_team_nth = new_nproc; -#endif // KMP_NESTED_HOT_TEAMS /* release the extra threads we don't need any more */ for (f = new_nproc; f < team->t.t_nproc; f++) { KMP_DEBUG_ASSERT(team->t.t_threads[f]); __kmp_free_thread(team->t.t_threads[f]); team->t.t_threads[f] = NULL; } -#if KMP_NESTED_HOT_TEAMS } // (__kmp_hot_teams_mode == 0) else { // When keeping extra threads in team, switch threads to wait on own @@ -5317,7 +5279,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, } } } -#endif // KMP_NESTED_HOT_TEAMS team->t.t_nproc = new_nproc; // TODO???: team->t.t_max_active_levels = new_max_active_levels; KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched); @@ -5358,7 +5319,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, int old_nproc = team->t.t_nproc; // save old value and use to update only team->t.t_size_changed = 1; -#if KMP_NESTED_HOT_TEAMS int avail_threads = hot_teams[level].hot_team_nth; if (new_nproc < avail_threads) avail_threads = new_nproc; @@ -5386,7 +5346,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, // get reserved threads involved if any. team->t.t_nproc = hot_teams[level].hot_team_nth; hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size -#endif // KMP_NESTED_HOT_TEAMS if (team->t.t_max_nproc < new_nproc) { /* reallocate larger arrays */ __kmp_reallocate_team_arrays(team, new_nproc); @@ -5435,9 +5394,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, /* Restore initial primary thread's affinity mask */ new_temp_affinity.restore(); #endif -#if KMP_NESTED_HOT_TEAMS } // end of check of t_nproc vs. new_nproc vs. hot_team_nth -#endif // KMP_NESTED_HOT_TEAMS if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) { // Barrier size already increased earlier in this function // Activate team threads via th_used_in_team @@ -5484,7 +5441,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, thr->th.th_teams_size = master->th.th_teams_size; } } -#if KMP_NESTED_HOT_TEAMS if (level) { // Sync barrier state for nested hot teams, not needed for outermost hot // team. @@ -5501,7 +5457,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, } } } -#endif // KMP_NESTED_HOT_TEAMS /* reallocate space for arguments if necessary */ __kmp_alloc_argv_entries(argc, team, TRUE); @@ -5666,8 +5621,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, /* free the team. return it to the team pool. release all the threads * associated with it */ -void __kmp_free_team(kmp_root_t *root, - kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { +void __kmp_free_team(kmp_root_t *root, kmp_team_t *team, kmp_info_t *master) { int f; KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id)); @@ -5679,7 +5633,6 @@ void __kmp_free_team(kmp_root_t *root, KMP_DEBUG_ASSERT(team->t.t_threads); int use_hot_team = team == root->r.r_hot_team; -#if KMP_NESTED_HOT_TEAMS int level; if (master) { level = team->t.t_active_level - 1; @@ -5702,7 +5655,6 @@ void __kmp_free_team(kmp_root_t *root, use_hot_team = 1; } } -#endif // KMP_NESTED_HOT_TEAMS /* team is done working */ TCW_SYNC_PTR(team->t.t_pkfn, @@ -5749,9 +5701,7 @@ void __kmp_free_team(kmp_root_t *root, 20, ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id)); -#if KMP_NESTED_HOT_TEAMS __kmp_free_task_team(master, task_team); -#endif team->t.t_task_team[tt_idx] = NULL; } } diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 392a02ebbd9aa..ec617ff870c47 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1501,7 +1501,6 @@ static void __kmp_stg_print_disp_buffers(kmp_str_buf_t *buffer, __kmp_stg_print_int(buffer, name, __kmp_dispatch_num_buffers); } // __kmp_stg_print_disp_buffers -#if KMP_NESTED_HOT_TEAMS // ----------------------------------------------------------------------------- // KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE @@ -1535,8 +1534,6 @@ static void __kmp_stg_print_hot_teams_mode(kmp_str_buf_t *buffer, __kmp_stg_print_int(buffer, name, __kmp_hot_teams_mode); } // __kmp_stg_print_hot_teams_mode -#endif // KMP_NESTED_HOT_TEAMS - // ----------------------------------------------------------------------------- // KMP_HANDLE_SIGNALS @@ -5569,12 +5566,10 @@ static kmp_setting_t __kmp_stg_table[] = { __kmp_stg_print_wait_policy, NULL, 0, 0}, {"KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers, __kmp_stg_print_disp_buffers, NULL, 0, 0}, -#if KMP_NESTED_HOT_TEAMS {"KMP_HOT_TEAMS_MAX_LEVEL", __kmp_stg_parse_hot_teams_level, __kmp_stg_print_hot_teams_level, NULL, 0, 0}, {"KMP_HOT_TEAMS_MODE", __kmp_stg_parse_hot_teams_mode, __kmp_stg_print_hot_teams_mode, NULL, 0, 0}, -#endif // KMP_NESTED_HOT_TEAMS #if KMP_HANDLE_SIGNALS {"KMP_HANDLE_SIGNALS", __kmp_stg_parse_handle_signals, @@ -5758,7 +5753,8 @@ static kmp_setting_t __kmp_stg_table[] = { #if OMPX_TASKGRAPH {"KMP_MAX_TDGS", __kmp_stg_parse_max_tdgs, __kmp_std_print_max_tdgs, NULL, 0, 0}, - {"KMP_TDG_DOT", __kmp_stg_parse_tdg_dot, __kmp_stg_print_tdg_dot, NULL, 0, 0}, + {"KMP_TDG_DOT", __kmp_stg_parse_tdg_dot, __kmp_stg_print_tdg_dot, NULL, 0, + 0}, #endif #if OMPT_SUPPORT From openmp-commits at lists.llvm.org Mon Jul 21 14:49:13 2025 From: openmp-commits at lists.llvm.org (Terry Wilmarth via Openmp-commits) Date: Mon, 21 Jul 2025 14:49:13 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] [NFC] Remove KMP_NESTED_HOT_TEAMS macro (PR #143584) In-Reply-To: Message-ID: <687eb5d9.170a0220.feb47.c4ee@mx.google.com> https://github.com/TerryLWilmarth closed https://github.com/llvm/llvm-project/pull/143584 From openmp-commits at lists.llvm.org Mon Jul 21 14:49:48 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Mon, 21 Jul 2025 14:49:48 -0700 (PDT) Subject: [Openmp-commits] [openmp] 4981bc2 - [OpenMP] Fixup bugs found during fuzz testing (#143455) Message-ID: <687eb5fc.170a0220.265935.09e8@mx.google.com> Author: Jonathan Peyton Date: 2025-07-21T17:49:45-04:00 New Revision: 4981bc24cff3344d477af04591b699da466e10b8 URL: https://github.com/llvm/llvm-project/commit/4981bc24cff3344d477af04591b699da466e10b8 DIFF: https://github.com/llvm/llvm-project/commit/4981bc24cff3344d477af04591b699da466e10b8.diff LOG: [OpenMP] Fixup bugs found during fuzz testing (#143455) A lot of these only trip when using sanitizers with the library. * Insert forgotten free()s * Change (-1) << amount to 0xffffffffu as left shifting a negative is UB * Fixup integer parser to return INT_MAX when parsing huge string of digits. e.g., 452523423423423423 returns INT_MAX * Fixup range parsing for affinity mask so integer overflow does not occur * Don't assert when branch bits are 0, instead warn user that is invalid and use the default value. * Fixup kmp_set_defaults() so the C version only uses null terminated strings and the Fortran version uses the string + size version. * Make sure the KMP_ALIGN_ALLOC is power of two, otherwise use CACHE_LINE. * Disallow ability to set KMP_TASKING=1 (task barrier) this doesn't work and hasn't worked for a long time. * Limit KMP_HOT_TEAMS_MAX_LEVEL to 1024, an array is allocated based on this value. * Remove integer values for OMP_PROC_BIND. The specification only allows strings and CSV of strings. * Fix setting KMP_AFFINITY=disabled + OMP_DISPLAY_AFFINITY=TRUE Added: openmp/runtime/test/env/check_certain_values.c openmp/runtime/test/tasking/no_task_barrier.c Modified: openmp/runtime/src/include/omp_lib.F90.var openmp/runtime/src/include/omp_lib.h.var openmp/runtime/src/kmp_affinity.cpp openmp/runtime/src/kmp_barrier.cpp openmp/runtime/src/kmp_barrier.h openmp/runtime/src/kmp_ftn_entry.h openmp/runtime/src/kmp_i18n.cpp openmp/runtime/src/kmp_lock.cpp openmp/runtime/src/kmp_runtime.cpp openmp/runtime/src/kmp_settings.cpp openmp/runtime/src/kmp_str.cpp Removed: ################################################################################ diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var index 20639f60b5d97..90d7e49ebf549 100644 --- a/openmp/runtime/src/include/omp_lib.F90.var +++ b/openmp/runtime/src/include/omp_lib.F90.var @@ -937,9 +937,8 @@ integer (kind=omp_integer_kind), value :: libnum end subroutine kmp_set_library - subroutine kmp_set_defaults(string) bind(c) - use, intrinsic :: iso_c_binding - character (kind=c_char) :: string(*) + subroutine kmp_set_defaults(string) + character (len=*) :: string end subroutine kmp_set_defaults function kmp_get_stacksize() bind(c) diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var index 5793a3ac2e685..a50bb018c7cc3 100644 --- a/openmp/runtime/src/include/omp_lib.h.var +++ b/openmp/runtime/src/include/omp_lib.h.var @@ -1010,8 +1010,8 @@ integer (kind=omp_integer_kind), value :: libnum end subroutine kmp_set_library - subroutine kmp_set_defaults(string) bind(c) - character string(*) + subroutine kmp_set_defaults(string) + character (len=*) :: string end subroutine kmp_set_defaults function kmp_get_stacksize() bind(c) diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index a6065fe792d55..6bfdfbf2d3cdc 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1396,11 +1396,13 @@ bool kmp_topology_t::filter_hw_subset() { // One last check that we shouldn't allow filtering entire machine if (num_filtered == num_hw_threads) { KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered); + KMP_CPU_FREE(filtered_mask); return false; } // Apply the filter restrict_to_mask(filtered_mask); + KMP_CPU_FREE(filtered_mask); return true; } @@ -2225,7 +2227,7 @@ class cpuid_cache_info_t { cache_mask_width = __kmp_cpuid_mask_width(max_threads_sharing); cache_level = __kmp_extract_bits<5, 7>(buf2.eax); table[depth].level = cache_level; - table[depth].mask = ((-1) << cache_mask_width); + table[depth].mask = ((0xffffffffu) << cache_mask_width); depth++; level++; } @@ -2755,13 +2757,13 @@ static bool __kmp_x2apicid_get_levels(int leaf, cpuid_proc_info_t *info, // Set the masks to & with apicid for (unsigned i = 0; i < levels_index; ++i) { if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) { - levels[i].mask = ~((-1) << levels[i].mask_width); - levels[i].cache_mask = (-1) << levels[i].mask_width; + levels[i].mask = ~((0xffffffffu) << levels[i].mask_width); + levels[i].cache_mask = (0xffffffffu) << levels[i].mask_width; for (unsigned j = 0; j < i; ++j) levels[i].mask ^= levels[j].mask; } else { KMP_DEBUG_ASSERT(i > 0); - levels[i].mask = (-1) << levels[i - 1].mask_width; + levels[i].mask = (0xffffffffu) << levels[i - 1].mask_width; levels[i].cache_mask = 0; } info->description.add(info->levels[i].level_type); @@ -4217,6 +4219,9 @@ static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) { if (stride > 0) { do { ADD_MASK_OSID(start, osId2Mask, maxOsId); + // Prevent possible overflow calculation + if (end - start < stride) + break; start += stride; } while (start <= end); } else { @@ -4238,6 +4243,7 @@ static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) { if (nextNewMask == 0) { *out_masks = NULL; KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); + KMP_CPU_FREE(sumMask); return; } KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); @@ -4406,6 +4412,7 @@ static void __kmp_process_place(const char **scan, kmp_affinity_t &affinity, (*scan)++; // skip '!' __kmp_process_place(scan, affinity, maxOsId, tempMask, setSize); KMP_CPU_COMPLEMENT(maxOsId, tempMask); + KMP_CPU_AND(tempMask, __kmp_affin_fullMask); } else if ((**scan >= '0') && (**scan <= '9')) { next = *scan; SKIP_DIGITS(next); @@ -4559,6 +4566,8 @@ void __kmp_affinity_process_placelist(kmp_affinity_t &affinity) { *out_numMasks = nextNewMask; if (nextNewMask == 0) { *out_masks = NULL; + KMP_CPU_FREE(tempMask); + KMP_CPU_FREE(previousMask); KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); return; } @@ -5280,13 +5289,18 @@ void __kmp_affinity_uninitialize(void) { if (affinity->os_id_masks != NULL) KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks); if (affinity->proclist != NULL) - __kmp_free(affinity->proclist); + KMP_INTERNAL_FREE(affinity->proclist); if (affinity->ids != NULL) __kmp_free(affinity->ids); if (affinity->attrs != NULL) __kmp_free(affinity->attrs); *affinity = KMP_AFFINITY_INIT(affinity->env_var); } + if (__kmp_affin_fullMask != NULL) { + KMP_CPU_FREE(__kmp_affin_fullMask); + __kmp_affin_fullMask = NULL; + } + __kmp_avail_proc = 0; if (__kmp_affin_origMask != NULL) { if (KMP_AFFINITY_CAPABLE()) { #if KMP_OS_AIX diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index d7ef57c608149..88a5cbb69ba87 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -205,6 +205,31 @@ void distributedBarrier::init(size_t nthr) { team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t)); } +void distributedBarrier::deallocate(distributedBarrier *db) { + for (int i = 0; i < MAX_ITERS; ++i) { + if (db->flags[i]) + KMP_INTERNAL_FREE(db->flags[i]); + db->flags[i] = NULL; + } + if (db->go) { + KMP_INTERNAL_FREE(db->go); + db->go = NULL; + } + if (db->iter) { + KMP_INTERNAL_FREE(db->iter); + db->iter = NULL; + } + if (db->sleep) { + KMP_INTERNAL_FREE(db->sleep); + db->sleep = NULL; + } + if (db->team_icvs) { + __kmp_free(db->team_icvs); + db->team_icvs = NULL; + } + KMP_ALIGNED_FREE(db); +} + // This function is used only when KMP_BLOCKTIME is not infinite. // static void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team, @@ -1890,8 +1915,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, break; } case bp_hyper_bar: { - // don't set branch bits to 0; use linear - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); break; @@ -1902,8 +1925,6 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split, break; } case bp_tree_bar: { - // don't set branch bits to 0; use linear - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj)); break; @@ -2297,7 +2318,6 @@ void __kmp_join_barrier(int gtid) { break; } case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL USE_ITT_BUILD_ARG(itt_sync_obj)); break; @@ -2308,7 +2328,6 @@ void __kmp_join_barrier(int gtid) { break; } case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL USE_ITT_BUILD_ARG(itt_sync_obj)); break; diff --git a/openmp/runtime/src/kmp_barrier.h b/openmp/runtime/src/kmp_barrier.h index ae9b8d62f4c3d..ce6100acc008e 100644 --- a/openmp/runtime/src/kmp_barrier.h +++ b/openmp/runtime/src/kmp_barrier.h @@ -130,8 +130,7 @@ class distributedBarrier { d->init(nThreads); return d; } - - static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); } + static void deallocate(distributedBarrier *db); void update_num_threads(size_t nthr) { init(nthr); } diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 59a9571d59534..2b0063eb23a0a 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -572,16 +572,14 @@ static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size, // Convert a Fortran string to a C string by adding null byte class ConvertedString { char *buf; - kmp_info_t *th; public: ConvertedString(char const *fortran_str, size_t size) { - th = __kmp_get_thread(); - buf = (char *)__kmp_thread_malloc(th, size + 1); + buf = (char *)KMP_INTERNAL_MALLOC(size + 1); KMP_STRNCPY_S(buf, size + 1, fortran_str, size); buf[size] = '\0'; } - ~ConvertedString() { __kmp_thread_free(th, buf); } + ~ConvertedString() { KMP_INTERNAL_FREE(buf); } const char *get() const { return buf; } }; #endif // KMP_STUB @@ -1495,10 +1493,18 @@ void FTN_STDCALL FTN_SET_DEFAULTS(char const *str #endif ) { #ifndef KMP_STUB + size_t sz; + char const *defaults = str; + #ifdef PASS_ARGS_BY_VALUE - int len = (int)KMP_STRLEN(str); + sz = KMP_STRLEN(str); +#else + sz = (size_t)len; + ConvertedString cstr(str, sz); + defaults = cstr.get(); #endif - __kmp_aux_set_defaults(str, len); + + __kmp_aux_set_defaults(defaults, sz); #endif } diff --git a/openmp/runtime/src/kmp_i18n.cpp b/openmp/runtime/src/kmp_i18n.cpp index a164aa180dd48..f93e2b9f9592f 100644 --- a/openmp/runtime/src/kmp_i18n.cpp +++ b/openmp/runtime/src/kmp_i18n.cpp @@ -791,8 +791,19 @@ void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, va_list args) { kmp_msg_t fmsg; // formatted message kmp_str_buf_t buffer; - if (severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off) + if (severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off) { + // Have to free all possible pre-allocated messages + // sent in through message and args + __kmp_str_free(&message.str); + for (;;) { + message = va_arg(args, kmp_msg_t); + if (message.type == kmp_mt_dummy && message.str == NULL) { + break; + } + __kmp_str_free(&message.str); + } return; // no reason to form a string in order to not print it + } __kmp_str_buf_init(&buffer); diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 47dcc806e20d1..fd1300352e95b 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3453,6 +3453,7 @@ void __kmp_cleanup_indirect_user_locks() { } __kmp_free(ptr->table[row]); } + __kmp_free(ptr->table); kmp_indirect_lock_table_t *next_table = ptr->next_table; if (ptr != &__kmp_i_lock_table) __kmp_free(ptr); diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index acc43e8fd92de..39b7834d358af 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8281,6 +8281,7 @@ void __kmp_cleanup(void) { __kmp_free(ptr); ptr = next; } + __kmp_old_threads_list = NULL; #if KMP_USE_DYNAMIC_LOCK __kmp_cleanup_indirect_user_locks(); @@ -8288,7 +8289,7 @@ void __kmp_cleanup(void) { __kmp_cleanup_user_locks(); #endif #if OMPD_SUPPORT - if (ompd_state) { + if (ompd_env_block) { __kmp_free(ompd_env_block); ompd_env_block = NULL; ompd_env_block_size = 0; @@ -8314,6 +8315,8 @@ void __kmp_cleanup(void) { __kmp_nested_proc_bind.bind_types = NULL; __kmp_nested_proc_bind.size = 0; __kmp_nested_proc_bind.used = 0; + __kmp_dflt_team_nth = 0; + __kmp_dflt_team_nth_ub = 0; if (__kmp_affinity_format) { KMP_INTERNAL_FREE(__kmp_affinity_format); __kmp_affinity_format = NULL; @@ -8321,6 +8324,9 @@ void __kmp_cleanup(void) { __kmp_i18n_catclose(); + if (__kmp_nesting_nth_level) + KMP_INTERNAL_FREE(__kmp_nesting_nth_level); + #if KMP_USE_HIER_SCHED __kmp_hier_scheds.deallocate(); #endif @@ -8329,6 +8335,9 @@ void __kmp_cleanup(void) { __kmp_stats_fini(); #endif + __kmpc_destroy_allocator(KMP_GTID_SHUTDOWN, __kmp_def_allocator); + __kmp_def_allocator = omp_default_mem_alloc; + KA_TRACE(10, ("__kmp_cleanup: exit\n")); } @@ -8724,11 +8733,15 @@ static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, break; #if KMP_AFFINITY_SUPPORTED case 'A': { - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); - rc = __kmp_str_buf_print(field_buffer, format, buf.str); - __kmp_str_buf_free(&buf); + if (th->th.th_affin_mask) { + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); + rc = __kmp_str_buf_print(field_buffer, format, buf.str); + __kmp_str_buf_free(&buf); + } else { + rc = __kmp_str_buf_print(field_buffer, "%s", "disabled"); + } } break; #endif default: diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index ec617ff870c47..31342c8c6203d 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1158,7 +1158,6 @@ static void __kmp_parse_nested_num_threads(const char *var, const char *env, } if (!__kmp_dflt_max_active_levels_set && total > 1) __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; - KMP_DEBUG_ASSERT(total > 0); if (total <= 0) { KMP_WARNING(NthSyntaxError, var, env); return; @@ -1248,8 +1247,11 @@ static void __kmp_stg_parse_num_threads(char const *name, char const *value, // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { // The array of 1 element - __kmp_nested_nth.nth = (int *)KMP_INTERNAL_MALLOC(sizeof(int)); - __kmp_nested_nth.size = __kmp_nested_nth.used = 1; + if (!__kmp_nested_nth.nth) { + __kmp_nested_nth.nth = (int *)KMP_INTERNAL_MALLOC(sizeof(int)); + __kmp_nested_nth.size = 1; + } + __kmp_nested_nth.used = 1; __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_xproc; } else { @@ -1361,6 +1363,11 @@ static void __kmp_stg_parse_tasking(char const *name, char const *value, void *data) { __kmp_stg_parse_int(name, value, 0, (int)tskm_max, (int *)&__kmp_tasking_mode); + // KMP_TASKING=1 (task barrier) doesn't work anymore, change to task_teams (2) + if (__kmp_tasking_mode == tskm_extra_barrier) { + KMP_WARNING(StgInvalidValue, name, value); + __kmp_tasking_mode = tskm_task_teams; + } } // __kmp_stg_parse_tasking static void __kmp_stg_print_tasking(kmp_str_buf_t *buffer, char const *name, @@ -1510,8 +1517,8 @@ static void __kmp_stg_parse_hot_teams_level(char const *name, char const *value, KMP_WARNING(EnvParallelWarn, name); return; } // read value before first parallel only - __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, - &__kmp_hot_teams_max_level); + __kmp_stg_parse_int(name, value, 0, 1024, &__kmp_hot_teams_max_level); + } // __kmp_stg_parse_hot_teams_level static void __kmp_stg_print_hot_teams_level(kmp_str_buf_t *buffer, @@ -1675,6 +1682,11 @@ static void __kmp_stg_parse_align_alloc(char const *name, char const *value, void *data) { __kmp_stg_parse_size(name, value, CACHE_LINE, INT_MAX, NULL, &__kmp_align_alloc, 1); + // Must be power of 2 + if (__kmp_align_alloc == 0 || ((__kmp_align_alloc - 1) & __kmp_align_alloc)) { + KMP_WARNING(StgInvalidValue, name, value); + __kmp_align_alloc = CACHE_LINE; + } } // __kmp_stg_parse_align_alloc static void __kmp_stg_print_align_alloc(kmp_str_buf_t *buffer, char const *name, @@ -1707,15 +1719,16 @@ static void __kmp_stg_parse_barrier_branch_bit(char const *name, } else { __kmp_barrier_release_branch_bits[i] = (kmp_uint32)__kmp_str_to_int(comma + 1, 0); - - if (__kmp_barrier_release_branch_bits[i] > KMP_MAX_BRANCH_BITS) { + if (__kmp_barrier_release_branch_bits[i] == 0 || + __kmp_barrier_release_branch_bits[i] > KMP_MAX_BRANCH_BITS) { __kmp_msg(kmp_ms_warning, KMP_MSG(BarrReleaseValueInvalid, name, comma + 1), __kmp_msg_null); __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; } } - if (__kmp_barrier_gather_branch_bits[i] > KMP_MAX_BRANCH_BITS) { + if (__kmp_barrier_gather_branch_bits[i] == 0 || + __kmp_barrier_gather_branch_bits[i] > KMP_MAX_BRANCH_BITS) { KMP_WARNING(BarrGatherValueInvalid, name, value); KMP_INFORM(Using_uint_Value, name, __kmp_barrier_gather_bb_dflt); __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; @@ -2195,7 +2208,7 @@ static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env, { ptr diff _t len = next - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); + char *retlist = (char *)KMP_INTERNAL_MALLOC((len + 1) * sizeof(char)); KMP_MEMCPY_S(retlist, (len + 1) * sizeof(char), env, len * sizeof(char)); retlist[len] = '\0'; *proclist = retlist; @@ -3013,7 +3026,7 @@ static int __kmp_parse_place_list(const char *var, const char *env, { ptr diff _t len = scan - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); + char *retlist = (char *)KMP_INTERNAL_MALLOC((len + 1) * sizeof(char)); KMP_MEMCPY_S(retlist, (len + 1) * sizeof(char), env, len * sizeof(char)); retlist[len] = '\0'; *place_list = retlist; @@ -3483,18 +3496,8 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value, const char *buf = value; const char *next; - int num; + SKIP_WS(buf); - if ((*buf >= '0') && (*buf <= '9')) { - next = buf; - SKIP_DIGITS(next); - num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num >= 0); - buf = next; - SKIP_WS(buf); - } else { - num = -1; - } next = buf; if (__kmp_match_str("disabled", buf, &next)) { @@ -3505,8 +3508,7 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value, #endif /* KMP_AFFINITY_SUPPORTED */ __kmp_nested_proc_bind.used = 1; __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } else if ((num == (int)proc_bind_false) || - __kmp_match_str("false", buf, &next)) { + } else if (__kmp_match_str("false", buf, &next)) { buf = next; SKIP_WS(buf); #if KMP_AFFINITY_SUPPORTED @@ -3514,8 +3516,7 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value, #endif /* KMP_AFFINITY_SUPPORTED */ __kmp_nested_proc_bind.used = 1; __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } else if ((num == (int)proc_bind_true) || - __kmp_match_str("true", buf, &next)) { + } else if (__kmp_match_str("true", buf, &next)) { buf = next; SKIP_WS(buf); __kmp_nested_proc_bind.used = 1; @@ -3551,19 +3552,16 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value, for (;;) { enum kmp_proc_bind_t bind; - if ((num == (int)proc_bind_primary) || - __kmp_match_str("master", buf, &next) || + if (__kmp_match_str("master", buf, &next) || __kmp_match_str("primary", buf, &next)) { buf = next; SKIP_WS(buf); bind = proc_bind_primary; - } else if ((num == (int)proc_bind_close) || - __kmp_match_str("close", buf, &next)) { + } else if (__kmp_match_str("close", buf, &next)) { buf = next; SKIP_WS(buf); bind = proc_bind_close; - } else if ((num == (int)proc_bind_spread) || - __kmp_match_str("spread", buf, &next)) { + } else if (__kmp_match_str("spread", buf, &next)) { buf = next; SKIP_WS(buf); bind = proc_bind_spread; @@ -3578,21 +3576,13 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value, if (i >= nelem) { break; } - KMP_DEBUG_ASSERT(*buf == ','); + if (*buf != ',') { + KMP_WARNING(ParseExtraCharsWarn, name, buf); + while (*buf != ',') + buf++; + } buf++; SKIP_WS(buf); - - // Read next value if it was specified as an integer - if ((*buf >= '0') && (*buf <= '9')) { - next = buf; - SKIP_DIGITS(next); - num = __kmp_str_to_int(buf, *next); - KMP_ASSERT(num >= 0); - buf = next; - SKIP_WS(buf); - } else { - num = -1; - } } SKIP_WS(buf); } @@ -4533,6 +4523,10 @@ static void __kmp_stg_print_atomic_mode(kmp_str_buf_t *buffer, char const *name, static void __kmp_stg_parse_consistency_check(char const *name, char const *value, void *data) { + if (TCR_4(__kmp_init_serial)) { + KMP_WARNING(EnvSerialWarn, name); + return; + } // read value before serial initialization only if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { // Note, this will not work from kmp_set_defaults because th_cons stack was // not allocated @@ -4899,7 +4893,6 @@ static void __kmp_stg_parse_spin_backoff_params(const char *name, } } } - KMP_DEBUG_ASSERT(total > 0); if (total <= 0) { KMP_WARNING(EnvSyntaxError, name, value); return; @@ -4995,7 +4988,6 @@ static void __kmp_stg_parse_adaptive_lock_props(const char *name, } } } - KMP_DEBUG_ASSERT(total > 0); if (total <= 0) { KMP_WARNING(EnvSyntaxError, name, value); return; diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp index 6ee2df724487c..12cce53074821 100644 --- a/openmp/runtime/src/kmp_str.cpp +++ b/openmp/runtime/src/kmp_str.cpp @@ -628,6 +628,11 @@ int __kmp_basic_str_to_int(char const *str) { for (t = str; *t != '\0'; ++t) { if (*t < '0' || *t > '9') break; + // Cap parsing to create largest integer + if (result >= (INT_MAX - (*t - '0')) / 10) { + result = INT_MAX; + break; + } result = (result * 10) + (*t - '0'); } @@ -643,9 +648,20 @@ int __kmp_str_to_int(char const *str, char sentinel) { for (t = str; *t != '\0'; ++t) { if (*t < '0' || *t > '9') break; + // Cap parsing to create largest integer + if (result >= (INT_MAX - (*t - '0')) / 10) { + result = INT_MAX; + break; + } result = (result * 10) + (*t - '0'); } + // Parse rest of large number by skipping the digits so t points to sentinel + if (result == INT_MAX) + for (t = str; *t != '\0'; ++t) + if (*t < '0' || *t > '9') + break; + switch (*t) { case '\0': /* the current default for no suffix is bytes */ factor = 1; diff --git a/openmp/runtime/test/env/check_certain_values.c b/openmp/runtime/test/env/check_certain_values.c new file mode 100644 index 0000000000000..6d2623749ab0a --- /dev/null +++ b/openmp/runtime/test/env/check_certain_values.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile +// RUN: env KMP_FORKJOIN_BARRIER=0,0 %libomp-run +// RUN: env KMP_PLAIN_BARRIER=0,0 %libomp-run +// RUN: env KMP_REDUCTION_BARRIER=0,0 %libomp-run +// RUN: env KMP_ALIGN_ALLOC=7 %libomp-run +// RUN: env KMP_ALIGN_ALLOC=8 %libomp-run +// RUN: env KMP_AFFINITY='explicit,proclist=[0-1222333333333444444]' %libomp-run +// RUN: env KMP_AFFINITY=disabled OMP_DISPLAY_AFFINITY=TRUE %libomp-run +// +// Test that certain environment variable values do not crash the runtime. +#include +#include + +int a = 0; + +int test() { +#pragma omp parallel reduction(+ : a) + { + a += omp_get_thread_num(); + } + if (a == 0) { + // If the test passes, 'a' should not be zero + // because we are using reduction on thread numbers. + return 0; + } + return 1; +} + +int main(int argc, char **argv) { + int status = EXIT_SUCCESS; + if (!test()) { + status = EXIT_FAILURE; + } + return status; +} diff --git a/openmp/runtime/test/tasking/no_task_barrier.c b/openmp/runtime/test/tasking/no_task_barrier.c new file mode 100644 index 0000000000000..da2e99ee408f7 --- /dev/null +++ b/openmp/runtime/test/tasking/no_task_barrier.c @@ -0,0 +1,28 @@ +// RUN: %libomp-compile +// RUN: env KMP_TASKING=0 %libomp-run +// RUN: env KMP_TASKING=1 %libomp-run +// RUN: env KMP_TASKING=2 %libomp-run +// +// Test to make sure the KMP_TASKING=1 option doesn't crash +// Can use KMP_TASKING=0 (immediate exec) or 2 (defer to task queue +// and steal during regular barrier) but cannot use +// KMP_TASKING=1 (explicit tasking barrier before regular barrier) +#include +#include +#include +int main() { + int i; +#pragma omp parallel + { +#pragma omp single + { + for (i = 0; i < 10; i++) { +#pragma omp task + { + printf("Task %d executed by thread %d\n", i, omp_get_thread_num()); + } + } + } + } + return EXIT_SUCCESS; +} From openmp-commits at lists.llvm.org Mon Jul 21 14:49:52 2025 From: openmp-commits at lists.llvm.org (Terry Wilmarth via Openmp-commits) Date: Mon, 21 Jul 2025 14:49:52 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Fixup bugs found during fuzz testing (PR #143455) In-Reply-To: Message-ID: <687eb600.050a0220.2e9a50.fe3f@mx.google.com> https://github.com/TerryLWilmarth closed https://github.com/llvm/llvm-project/pull/143455 From openmp-commits at lists.llvm.org Mon Jul 21 16:53:50 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:53:50 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed30e.170a0220.282d23.0a01@mx.google.com> ================ @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty ---------------- Meinersbur wrote: Some additional notes: %flang_bare` actually is the driver (i.e. not `flang -fc1`, have to change the comments). The problem is that the intrinsic modules may not be in the resource directory, but in the directory defined by `FLANG_INTRINSIC_MODULES_DIR`, as otherwise it would not be possible run check-flang tests in a Flang-standalone build (that does not build flang-rt, hence no intrinsic modules). `FLANG_INTRINSIC_MODULES_DIR` is implemented by replacing `flang` with `flang -fintrinsic-modules-path ${FLANG_INTRINSIC_MODULES_DIR}`, i.e. always comes first in the command line. This means it will always have priority over `-fintrinsic-modules-path %S/Inputs/`. This is incompatible with the second RUN line that depends on the files in `%S/Inputs/` being picked up. Hence, this test adds `-fintrinsic-modules-path ${FLANG_INTRINSIC_MODULES_DIR}` (if necessary) after `-fintrinsic-modules-path %S/Inputs/`, since with a `%flang` substitution it is not possible to put it behind all other flags. `%flang_bare` avoids that `-fintrinsic-modules-path ${FLANG_INTRINSIC_MODULES_DIR}` is (also) added before `%S/Inputs/`. One could also redesign this test, i.e. that it expects the default intrinsic modules to be matched, not the ones in `%S/Inputs/`. https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:54:14 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:54:14 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed326.170a0220.1affc.144e@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:54:59 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:54:59 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed353.170a0220.37699d.06a4@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:55:32 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:55:32 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed374.050a0220.23e52f.0733@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:56:19 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:56:19 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed3a3.050a0220.1634f6.fa9d@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:57:05 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:57:05 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed3d1.170a0220.1833f5.0646@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:57:59 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:57:59 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed407.630a0220.31eae0.f556@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:58:39 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:58:39 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed42f.170a0220.3082ea.01ab@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Mon Jul 21 16:59:13 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Mon, 21 Jul 2025 16:59:13 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687ed451.170a0220.40ba3.0082@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Tue Jul 22 03:12:39 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:12:39 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6417.170a0220.6d2ef.ec80@mx.google.com> https://github.com/Shruti05-MS updated https://github.com/llvm/llvm-project/pull/149987 >From c9b469ae4b9a9c1c2c3d0f1411eeb4d5ba1403de Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:12:36 +0530 Subject: [PATCH 1/2] Create test.c --- openmp/test.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 openmp/test.c diff --git a/openmp/test.c b/openmp/test.c new file mode 100644 index 0000000000000..51b7330748985 --- /dev/null +++ b/openmp/test.c @@ -0,0 +1,12 @@ +#include +#include + +int main() { + int x = 0; + #pragma omp parallel for + for (int i = 0; i < 10; i++) { + x += i; + } + printf("x = %d\n", x); + return 0; +} >From 395c5eb101cab0700ec7830f1a17ba2badc1e0df Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:42:31 +0530 Subject: [PATCH 2/2] Update test.c --- openmp/test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openmp/test.c b/openmp/test.c index 51b7330748985..023f16912a4a4 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,3 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } +// Updated to trigger bot + From openmp-commits at lists.llvm.org Tue Jul 22 03:33:36 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:33:36 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6900.a70a0220.2811e3.1a9d@mx.google.com> https://github.com/Shruti05-MS updated https://github.com/llvm/llvm-project/pull/149987 >From c9b469ae4b9a9c1c2c3d0f1411eeb4d5ba1403de Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:12:36 +0530 Subject: [PATCH 1/3] Create test.c --- openmp/test.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 openmp/test.c diff --git a/openmp/test.c b/openmp/test.c new file mode 100644 index 0000000000000..51b7330748985 --- /dev/null +++ b/openmp/test.c @@ -0,0 +1,12 @@ +#include +#include + +int main() { + int x = 0; + #pragma omp parallel for + for (int i = 0; i < 10; i++) { + x += i; + } + printf("x = %d\n", x); + return 0; +} >From 395c5eb101cab0700ec7830f1a17ba2badc1e0df Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:42:31 +0530 Subject: [PATCH 2/3] Update test.c --- openmp/test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openmp/test.c b/openmp/test.c index 51b7330748985..023f16912a4a4 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,3 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } +// Updated to trigger bot + >From b4faa6d4e4a83be26f7951ebd684e27d3e72bd52 Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 16:03:27 +0530 Subject: [PATCH 3/3] Update test.c --- openmp/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/test.c b/openmp/test.c index 023f16912a4a4..64a495e6cecd3 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,5 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } -// Updated to trigger bot +// trigger to bot From openmp-commits at lists.llvm.org Tue Jul 22 03:39:22 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:39:22 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6a5a.630a0220.ab58a.1405@mx.google.com> https://github.com/Shruti05-MS updated https://github.com/llvm/llvm-project/pull/149987 >From c9b469ae4b9a9c1c2c3d0f1411eeb4d5ba1403de Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:12:36 +0530 Subject: [PATCH 1/4] Create test.c --- openmp/test.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 openmp/test.c diff --git a/openmp/test.c b/openmp/test.c new file mode 100644 index 0000000000000..51b7330748985 --- /dev/null +++ b/openmp/test.c @@ -0,0 +1,12 @@ +#include +#include + +int main() { + int x = 0; + #pragma omp parallel for + for (int i = 0; i < 10; i++) { + x += i; + } + printf("x = %d\n", x); + return 0; +} >From 395c5eb101cab0700ec7830f1a17ba2badc1e0df Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:42:31 +0530 Subject: [PATCH 2/4] Update test.c --- openmp/test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openmp/test.c b/openmp/test.c index 51b7330748985..023f16912a4a4 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,3 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } +// Updated to trigger bot + >From b4faa6d4e4a83be26f7951ebd684e27d3e72bd52 Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 16:03:27 +0530 Subject: [PATCH 3/4] Update test.c --- openmp/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/test.c b/openmp/test.c index 023f16912a4a4..64a495e6cecd3 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,5 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } -// Updated to trigger bot +// trigger to bot >From 98a2e1fad5685dd9623f73e9bfa1f8236dfe18eb Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 16:09:13 +0530 Subject: [PATCH 4/4] Update test.c From openmp-commits at lists.llvm.org Tue Jul 22 03:51:26 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:51:26 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6d2e.170a0220.19cf55.31c6@mx.google.com> https://github.com/Shruti05-MS updated https://github.com/llvm/llvm-project/pull/149987 >From c9b469ae4b9a9c1c2c3d0f1411eeb4d5ba1403de Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:12:36 +0530 Subject: [PATCH 1/5] Create test.c --- openmp/test.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 openmp/test.c diff --git a/openmp/test.c b/openmp/test.c new file mode 100644 index 0000000000000..51b7330748985 --- /dev/null +++ b/openmp/test.c @@ -0,0 +1,12 @@ +#include +#include + +int main() { + int x = 0; + #pragma omp parallel for + for (int i = 0; i < 10; i++) { + x += i; + } + printf("x = %d\n", x); + return 0; +} >From 395c5eb101cab0700ec7830f1a17ba2badc1e0df Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 15:42:31 +0530 Subject: [PATCH 2/5] Update test.c --- openmp/test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openmp/test.c b/openmp/test.c index 51b7330748985..023f16912a4a4 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,3 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } +// Updated to trigger bot + >From b4faa6d4e4a83be26f7951ebd684e27d3e72bd52 Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 16:03:27 +0530 Subject: [PATCH 3/5] Update test.c --- openmp/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/test.c b/openmp/test.c index 023f16912a4a4..64a495e6cecd3 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,5 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } -// Updated to trigger bot +// trigger to bot >From 98a2e1fad5685dd9623f73e9bfa1f8236dfe18eb Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 16:09:13 +0530 Subject: [PATCH 4/5] Update test.c >From 1d9e0ae1cdd75886e61b2c77ea4084bb696c9e6e Mon Sep 17 00:00:00 2001 From: Shruti05-MS Date: Tue, 22 Jul 2025 16:14:08 +0530 Subject: [PATCH 5/5] Update test.c --- openmp/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/test.c b/openmp/test.c index 64a495e6cecd3..1e8e6b865bfab 100644 --- a/openmp/test.c +++ b/openmp/test.c @@ -10,5 +10,5 @@ int main() { printf("x = %d\n", x); return 0; } -// trigger to bot +// re-triggering the AI bot From openmp-commits at lists.llvm.org Tue Jul 22 03:51:32 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:51:32 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6d34.170a0220.240652.275d@mx.google.com> Shruti05-MS wrote: updated https://github.com/llvm/llvm-project/pull/149987 From openmp-commits at lists.llvm.org Tue Jul 22 03:53:08 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:53:08 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6d94.170a0220.282d23.2a3d@mx.google.com> https://github.com/Shruti05-MS closed https://github.com/llvm/llvm-project/pull/149987 From openmp-commits at lists.llvm.org Tue Jul 22 03:53:22 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 03:53:22 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687f6da2.170a0220.135a95.21fb@mx.google.com> https://github.com/Shruti05-MS reopened https://github.com/llvm/llvm-project/pull/149987 From openmp-commits at lists.llvm.org Tue Jul 22 07:44:39 2025 From: openmp-commits at lists.llvm.org (Peter Klausler via Openmp-commits) Date: Tue, 22 Jul 2025 07:44:39 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687fa3d7.170a0220.a991.9e95@mx.google.com> ================ @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty ---------------- klausler wrote: I don't understand how flang tests that depend on fortran_type_info.mod are going to work. https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Tue Jul 22 07:52:57 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 07:52:57 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687fa5c9.170a0220.1113e.99a7@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 01/14] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 02/14] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 03/14] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 04/14] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 05/14] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 06/14] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 07/14] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 08/14] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 8ec6d03939c3a7e2f79bb3ea5911b7607028dd7c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 21:52:02 +0200 Subject: [PATCH 09/14] Also add dependency barrier for WIN32 --- flang-rt/lib/runtime/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 1b8114c102205..20f5d84bb2b69 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -281,14 +281,18 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") - add_flangrt_library(${name}.intrinsics OBJECT + add_flangrt_library(${name}.intrinsics.obj OBJECT ${intrinsics_sources} ) + add_custom_target(${name}.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) add_flangrt_library(${name} ${libtype} - ${sources} $ + ${sources} $ ${ARGN} - LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -311,8 +315,9 @@ else() endif () get_target_property(compile_target ${name}.compile ALIASED_TARGET) - flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) + add_dependencies(${compile_target} ${name}.intrinsics) add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") >From 580a0c56535a9cd1c65fd5a5e6309b73c36ae8b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:05:34 +0200 Subject: [PATCH 10/14] Dependency barrier test --- flang-rt/lib/runtime/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 20f5d84bb2b69..bdd4318832473 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,6 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics + COMMAND echo "Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) @@ -295,6 +296,7 @@ else() LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) + get_target_property(compile_target ${name}.compile ALIASED_TARGET) if (msvc_lib) set_target_properties(${name} @@ -311,14 +313,13 @@ else() set(is_public "") else () set(is_public PUBLIC) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) set(_has_public_intrinsics "YES" PARENT_SCOPE) endif () - get_target_property(compile_target ${name}.compile ALIASED_TARGET) flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) add_dependencies(${compile_target} ${name}.intrinsics) - add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") >From 979691a5bba4888be8c7c82d1bed4e8cdc71fff9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:06:40 +0200 Subject: [PATCH 11/14] Dependency barrier info --- flang-rt/lib/runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index bdd4318832473..24b84b11f9513 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,7 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics - COMMAND echo "Dependency barrier" + COMMAND echo "${name} Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) >From b4adeab58e615b9059c0a6e5bbcb376d1fe21bb2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 21 Jul 2025 21:56:26 +0200 Subject: [PATCH 12/14] Avoid unrelated changes --- clang/include/clang/Driver/Driver.h | 1 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 9343fed36b6ac..14e1e644e51aa 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,6 +403,7 @@ class Driver { SmallString<128> &CrashDiagDir); public: + /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 688b5aacc4bcd..609ba27bc212b 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1376,10 +1376,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError( - loc, llvm::Twine("runtime derived type info descriptor of '") + name + - "' was not generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError(loc, + "runtime derived type info descriptor was not " + "generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition >From c40f43c98ff7ddbcb52c1ac35210320926369b2c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 14:16:19 +0200 Subject: [PATCH 13/14] Use -fintrinsic-modules-path= --- flang/test/lit.cfg.py | 20 +++----------------- runtimes/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index b05eba8da0b0c..bbf9a5a9f277f 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -147,26 +147,18 @@ def get_resource_module_intrinsic_dir(): intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] + intrinsics_search_args += [f"-fintrinsic-modules-path={flang_intrinsics_dir}"] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: extra_intrinsics_search_args += [ - "-fintrinsic-modules-path", - config.flang_intrinsic_modules_dir, + f"-fintrinsic-modules-path={config.flang_intrinsic_modules_dir}", ] lit_config.note( f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" ) -config.substitutions.append( - ( - "%intrinsic_module_flags", - " ".join(intrinsics_search_args + extra_intrinsics_search_args), - ) -) - # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ @@ -193,13 +185,7 @@ def get_resource_module_intrinsic_dir(): "%bbc_bare", command=FindTool("bbc"), unresolved="fatal", - ), - ToolSubst( - "%flang_bare", - command=FindTool("flang"), - extra_args=isysroot_flag, - unresolved="fatal", - ), + ) ] # Flang has several unimplemented features. TODO messages are used to mark diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 51214a46f558e..0832767505fe1 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -454,7 +454,7 @@ function (flang_module_target tgtname) # Let it find the other public module files target_compile_options(${tgtname} PRIVATE - "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + "$<$:-fintrinsic-modules-path=${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" ) if (ARG_PUBLIC) >From 18422d4d6ea24aeb29a4fece11ba7b509fbd71d6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 16:50:58 +0200 Subject: [PATCH 14/14] Rework intrinsic-module-path.f90 test --- flang/test/Driver/Inputs/ieee_arithmetic.mod | 1 + flang/test/Driver/Inputs/iso_fortran_env.mod | 1 + flang/test/Driver/intrinsic-module-path.f90 | 54 ++++++++++++++++---- flang/test/lit.cfg.py | 2 +- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/flang/test/Driver/Inputs/ieee_arithmetic.mod b/flang/test/Driver/Inputs/ieee_arithmetic.mod index 30fd57801970b..451d1af62a941 100644 --- a/flang/test/Driver/Inputs/ieee_arithmetic.mod +++ b/flang/test/Driver/Inputs/ieee_arithmetic.mod @@ -1,5 +1,6 @@ ! DUMMY module ! Added for testing purposes. The contents of this file are currently not relevant. +! Using this file file cause an error because of missing checksum module ieee_arithmetic type::ieee_round_type integer(1),private::mode=0_1 diff --git a/flang/test/Driver/Inputs/iso_fortran_env.mod b/flang/test/Driver/Inputs/iso_fortran_env.mod index 689297d52027b..ad501c2d9c1b8 100644 --- a/flang/test/Driver/Inputs/iso_fortran_env.mod +++ b/flang/test/Driver/Inputs/iso_fortran_env.mod @@ -1,5 +1,6 @@ ! DUMMY module ! Added for testing purposes. The contents of this file are currently not relevant. +! Using this file file cause an error because of missing checksum module iso_fortran_env use __fortran_builtins,only:event_type=>__builtin_event_type use __fortran_builtins,only:lock_type=>__builtin_lock_type diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 3e696ce5d08e0..3317eb776f0a1 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -1,23 +1,55 @@ ! Ensure argument -fintrinsic-modules-path works as expected. -! WITHOUT the option, the default location for the module is checked and no error generated. -! With the option GIVEN, the module with the same name is PREPENDED, and considered over the -! default one, causing a CHECKSUM error. !----------------------------------------- -! FRONTEND FLANG DRIVER (flang -fc1) +! FLANG DRIVER !----------------------------------------- -! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty -! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN -! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path=%S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN +! NOTE: Depending on how Flang is built, the default intrinsics may have higher +! or lower priority than -fintrinsic-modules-path added here. Using +! basictestmoduleone.mod from Inputs/module-dir/ will trigger an error. -! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found -! WITHOUT-NOT: 'iso_fortran_env.mod' was not found +! RUN: %flang -fsyntax-only -### %s 2>&1 | FileCheck %s --check-prefix=DEFAULTPATH -! GIVEN: error: Cannot use module file for module 'ieee_arithmetic': File has invalid checksum -! GIVEN: error: Cannot use module file for module 'iso_fortran_env': File has invalid checksum +! RUN: %flang -fsyntax-only -DINTRINSICS_DEFAULT %s +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTONE %s 2>&1 | FileCheck %s --check-prefix=NOINPUTONE +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTTWO %s 2>&1 | FileCheck %s --check-prefix=NOINPUTTWO +! RUN: %flang -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: %flang -fsyntax-only -DINTRINSICS_INPUTONE -fintrinsic-modules-path=%S/Inputs/ %s +! RUN: %flang -fsyntax-only -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/ -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir/ -fintrinsic-modules-path=%S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=WRONGINPUTONE +!----------------------------------------- +! FLANG FRONTEND (flang -fc1) +!----------------------------------------- +! NOTE: %flang_cc1 the default intrinsics path always has higher priority than +! -fintrinsic-modules-path added here. Accidentally using +! ieee_arithmetic/iso_fortran_env from the Inputs/ directory will trigger +! an error (e.g. when the default intrinsics dir is empty). + +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT %s +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE %s 2>&1 | FileCheck %s --check-prefix=NOINPUTONE +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO %s 2>&1 | FileCheck %s --check-prefix=NOINPUTTWO +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir %s +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -fintrinsic-modules-path=%S/Inputs/ %s +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/ -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir -fintrinsic-modules-path=%S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=WRONGINPUTONE + + +! DEFAULTPATH: flang{{.*}}-fc1{{.*}}-fintrinsic-modules-path + +! NOINPUTONE: Source file 'basictestmoduleone.mod' was not found +! NOINPUTTWO: Source file 'basictestmoduletwo.mod' was not found +! WRONGINPUTONE: 't1' not found in module 'basictestmoduleone' + program test_intrinsic_module_path +#ifdef INTRINSICS_DEFAULT use ieee_arithmetic, only: ieee_round_type use iso_fortran_env, only: team_type, event_type, lock_type +#endif +#ifdef INTRINSICS_INPUTONE + use basictestmoduleone, only: t1 +#endif +#ifdef INTRINSICS_INPUTTWO + use basictestmoduletwo, only: t2 +#endif end program diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index bbf9a5a9f277f..ba1e5d55b503c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -180,7 +180,7 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly + # Do not implicitly add intrinsic search paths ToolSubst( "%bbc_bare", command=FindTool("bbc"), From openmp-commits at lists.llvm.org Tue Jul 22 07:54:21 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 22 Jul 2025 07:54:21 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687fa61d.630a0220.232362.8183@mx.google.com> https://github.com/mhalk updated https://github.com/llvm/llvm-project/pull/147381 >From e0195f8b1b567333ee08453a1ba4c01e3513e301 Mon Sep 17 00:00:00 2001 From: Michael Halkenhaeuser Date: Wed, 2 Jul 2025 05:32:04 -0500 Subject: [PATCH] [OpenMP] Add ompTest library to OpenMP Description =========== OpenMP Tooling Interface Testing Library (ompTest) ompTest is a unit testing framework for testing OpenMP implementations. It offers a simple-to-use framework that allows a tester to check for OMPT events in addition to regular unit testing code, supported by linking against GoogleTest by default. It also facilitates writing concise tests while bridging the semantic gap between the unit under test and the OMPT-event testing. Background ========== This library has been developed to provide the means of testing OMPT implementations with reasonable effort. Especially, asynchronous or unordered events are supported and can be verified with ease, which may prove to be challenging with LIT-based tests. Additionally, since the assertions are part of the code being tested, ompTest can reference all corresponding variables during assertion. Basic Usage =========== OMPT event assertions are placed before the code, which shall be tested. These assertion can either be provided as one block or interleaved with the test code. There are two types of asserters: (1) sequenced "order-sensitive" and (2) set "unordered" assserters. Once the test is being run, the corresponding events are triggered by the OpenMP runtime and can be observed. Each of these observed events notifies asserters, which then determine if the test should pass or fail. Example (partial, interleaved) ============================== int N = 100000; int a[N]; int b[N]; OMPT_ASSERT_SEQUENCE(Target, TARGET, BEGIN, 0); OMPT_ASSERT_SEQUENCE(TargetDataOp, ALLOC, N * sizeof(int)); // a ? OMPT_ASSERT_SEQUENCE(TargetDataOp, H2D, N * sizeof(int), &a); OMPT_ASSERT_SEQUENCE(TargetDataOp, ALLOC, N * sizeof(int)); // b ? OMPT_ASSERT_SEQUENCE(TargetDataOp, H2D, N * sizeof(int), &b); OMPT_ASSERT_SEQUENCE(TargetSubmit, 1); OMPT_ASSERT_SEQUENCE(TargetDataOp, D2H, N * sizeof(int), nullptr, &b); OMPT_ASSERT_SEQUENCE(TargetDataOp, D2H, N * sizeof(int), nullptr, &a); OMPT_ASSERT_SEQUENCE(TargetDataOp, DELETE); OMPT_ASSERT_SEQUENCE(TargetDataOp, DELETE); OMPT_ASSERT_SEQUENCE(Target, TARGET, END, 0); { for (int j = 0; j < N; j++) a[j] = b[j]; } References ========== This work has been presented at SC'24 workshops, see: https://ieeexplore.ieee.org/document/10820689 Current State and Future Work ============================= ompTest's development was mostly device-centric and aimed at OMPT device callbacks and device-side tracing. Consequentially, a substantial part of host-related events or features may not be supported in its current state. However, we are confident that the related functionality can be added and ompTest provides a general foundation for future OpenMP and especially OMPT testing. This PR will allow us to upstream the corresponding features, like OMPT device-side tracing in the future with significantly reduced risk of introducing regressions in the process. Build ===== ompTest is linked against LLVM's GoogleTest by default, but can also be built 'standalone'. Additionally, it comes with a set of unit tests, which in turn require GoogleTest (overriding a standalone build). The unit tests are added to the `check-openmp` target. Use the following parameters to perform the corresponding build: `LIBOMPTEST_BUILD_STANDALONE` (Default: OFF) `LIBOMPTEST_BUILD_UNITTESTS` (Default: OFF) --------- Co-authored-by: Jan-Patrick Lehr Co-authored-by: Joachim --- openmp/README.rst | 1 + openmp/tools/omptest/CMakeLists.txt | 117 ++++ openmp/tools/omptest/README.md | 279 +++++++++ .../omptest/cmake/omptest-config.cmake.in | 30 + openmp/tools/omptest/include/AssertMacros.h | 138 ++++ openmp/tools/omptest/include/InternalEvent.h | 436 +++++++++++++ .../omptest/include/InternalEventCommon.h | 128 ++++ openmp/tools/omptest/include/Logging.h | 155 +++++ openmp/tools/omptest/include/OmptAliases.h | 85 +++ .../tools/omptest/include/OmptAssertEvent.h | 377 +++++++++++ openmp/tools/omptest/include/OmptAsserter.h | 291 +++++++++ .../omptest/include/OmptCallbackHandler.h | 165 +++++ openmp/tools/omptest/include/OmptTester.h | 60 ++ .../tools/omptest/include/OmptTesterGlobals.h | 36 ++ .../omptest/include/OmptTesterGoogleTest.h | 86 +++ .../omptest/include/OmptTesterStandalone.h | 123 ++++ openmp/tools/omptest/src/InternalEvent.cpp | 367 +++++++++++ .../omptest/src/InternalEventOperators.cpp | 317 ++++++++++ openmp/tools/omptest/src/Logging.cpp | 177 ++++++ openmp/tools/omptest/src/OmptAssertEvent.cpp | 587 ++++++++++++++++++ openmp/tools/omptest/src/OmptAsserter.cpp | 477 ++++++++++++++ .../tools/omptest/src/OmptCallbackHandler.cpp | 445 +++++++++++++ openmp/tools/omptest/src/OmptTester.cpp | 504 +++++++++++++++ .../omptest/src/OmptTesterStandalone.cpp | 147 +++++ openmp/tools/omptest/test/CMakeLists.txt | 28 + openmp/tools/omptest/test/lit.cfg | 26 + openmp/tools/omptest/test/lit.site.cfg.in | 9 + .../test/unittests/asserter-seq-test.cpp | 358 +++++++++++ .../test/unittests/internal-event-test.cpp | 530 ++++++++++++++++ .../test/unittests/internal-util-test.cpp | 95 +++ .../omptest/test/unittests/main-test.cpp | 141 +++++ 31 files changed, 6715 insertions(+) create mode 100644 openmp/tools/omptest/CMakeLists.txt create mode 100644 openmp/tools/omptest/README.md create mode 100644 openmp/tools/omptest/cmake/omptest-config.cmake.in create mode 100644 openmp/tools/omptest/include/AssertMacros.h create mode 100644 openmp/tools/omptest/include/InternalEvent.h create mode 100644 openmp/tools/omptest/include/InternalEventCommon.h create mode 100644 openmp/tools/omptest/include/Logging.h create mode 100644 openmp/tools/omptest/include/OmptAliases.h create mode 100644 openmp/tools/omptest/include/OmptAssertEvent.h create mode 100644 openmp/tools/omptest/include/OmptAsserter.h create mode 100644 openmp/tools/omptest/include/OmptCallbackHandler.h create mode 100644 openmp/tools/omptest/include/OmptTester.h create mode 100644 openmp/tools/omptest/include/OmptTesterGlobals.h create mode 100644 openmp/tools/omptest/include/OmptTesterGoogleTest.h create mode 100644 openmp/tools/omptest/include/OmptTesterStandalone.h create mode 100644 openmp/tools/omptest/src/InternalEvent.cpp create mode 100644 openmp/tools/omptest/src/InternalEventOperators.cpp create mode 100644 openmp/tools/omptest/src/Logging.cpp create mode 100644 openmp/tools/omptest/src/OmptAssertEvent.cpp create mode 100644 openmp/tools/omptest/src/OmptAsserter.cpp create mode 100644 openmp/tools/omptest/src/OmptCallbackHandler.cpp create mode 100644 openmp/tools/omptest/src/OmptTester.cpp create mode 100644 openmp/tools/omptest/src/OmptTesterStandalone.cpp create mode 100644 openmp/tools/omptest/test/CMakeLists.txt create mode 100644 openmp/tools/omptest/test/lit.cfg create mode 100644 openmp/tools/omptest/test/lit.site.cfg.in create mode 100644 openmp/tools/omptest/test/unittests/asserter-seq-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/internal-event-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/internal-util-test.cpp create mode 100644 openmp/tools/omptest/test/unittests/main-test.cpp diff --git a/openmp/README.rst b/openmp/README.rst index 2dfc8630858b8..c34d3e8a40d7d 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -369,6 +369,7 @@ There are following check-* make targets for tests. - ``check-ompt`` (ompt tests under runtime/test/ompt) - ``check-ompt-multiplex`` (ompt multiplex tests under tools/multiplex/tests) +- ``check-ompt-omptest`` (ompt omptest tests under tools/omptest/tests) - ``check-libarcher`` (libarcher tests under tools/archer/tests) - ``check-libomp`` (libomp tests under runtime/test. This includes check-ompt tests too) - ``check-libomptarget-*`` (libomptarget tests for specific target under libomptarget/test) diff --git a/openmp/tools/omptest/CMakeLists.txt b/openmp/tools/omptest/CMakeLists.txt new file mode 100644 index 0000000000000..d8b33bd4de0ee --- /dev/null +++ b/openmp/tools/omptest/CMakeLists.txt @@ -0,0 +1,117 @@ +##===----------------------------------------------------------------------===## +# +# Build OMPT unit testing library: ompTest +# +##===----------------------------------------------------------------------===## + +cmake_minimum_required(VERSION 3.22) +project(omptest LANGUAGES CXX) + +option(LIBOMPTEST_BUILD_STANDALONE + "Build ompTest 'standalone', i.e. w/o GoogleTest." + ${OPENMP_STANDALONE_BUILD}) +option(LIBOMPTEST_BUILD_UNITTESTS + "Build ompTest's unit tests , requires GoogleTest." OFF) + +# In absence of corresponding OMPT support: exit early +if(NOT ${LIBOMP_OMPT_SUPPORT}) + return() +endif() + +set(OMPTEST_HEADERS + ./include/AssertMacros.h + ./include/InternalEvent.h + ./include/InternalEventCommon.h + ./include/Logging.h + ./include/OmptAliases.h + ./include/OmptAsserter.h + ./include/OmptAssertEvent.h + ./include/OmptCallbackHandler.h + ./include/OmptTester.h + ./include/OmptTesterGlobals.h +) + +add_library(omptest + SHARED + + ${OMPTEST_HEADERS} + ./src/InternalEvent.cpp + ./src/InternalEventOperators.cpp + ./src/Logging.cpp + ./src/OmptAsserter.cpp + ./src/OmptAssertEvent.cpp + ./src/OmptCallbackHandler.cpp + ./src/OmptTester.cpp +) + +# Target: ompTest library +# On (implicit) request of GoogleTest, link against the one provided with LLVM. +if ((NOT LIBOMPTEST_BUILD_STANDALONE) OR LIBOMPTEST_BUILD_UNITTESTS) + # Check if standalone build was requested together with unittests + if (LIBOMPTEST_BUILD_STANDALONE) + # Emit warning: this build actually depends on LLVM's GoogleTest + message(WARNING "LIBOMPTEST_BUILD_STANDALONE and LIBOMPTEST_BUILD_UNITTESTS" + " requested simultaneously.\n" + "Linking against LLVM's GoogleTest library archives.\n" + "Disable LIBOMPTEST_BUILD_UNITTESTS to perform an actual" + " standalone build.") + # Explicitly disable LIBOMPTEST_BUILD_STANDALONE + set(LIBOMPTEST_BUILD_STANDALONE OFF) + endif() + + # Use LLVM's gtest library archive + set(GTEST_LIB "${LLVM_BINARY_DIR}/lib/libllvm_gtest.a") + # Link gtest as whole-archive to expose required symbols + set(GTEST_LINK_CMD "-Wl,--whole-archive" ${GTEST_LIB} + "-Wl,--no-whole-archive" LLVMSupport) + + # Add GoogleTest-based header + target_sources(omptest PRIVATE ./include/OmptTesterGoogleTest.h) + + # Add LLVM-provided GoogleTest include directories. + target_include_directories(omptest PRIVATE + ${LLVM_THIRD_PARTY_DIR}/unittest/googletest/include) + + # TODO: Re-visit ABI breaking checks, disable for now. + target_compile_definitions(omptest PUBLIC + -DLLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING) + + # Link against gtest and gtest_main + target_link_libraries(omptest PRIVATE ${GTEST_LINK_CMD}) +else() + # Add 'standalone' compile definitions + target_compile_definitions(omptest PRIVATE + -DOPENMP_LIBOMPTEST_BUILD_STANDALONE) + + # Add 'standalone' source files + target_sources(omptest PRIVATE + ./include/OmptTesterStandalone.h + ./src/OmptTesterStandalone.cpp) +endif() + +# Add common include directories. +target_include_directories(omptest PRIVATE + ./include + ${LIBOMPTARGET_INCLUDE_DIR}) +target_compile_features(omptest PRIVATE cxx_std_17) + +# Create and install package configuration files. +configure_file( + ${omptest_SOURCE_DIR}/cmake/omptest-config.cmake.in + ${omptest_BINARY_DIR}/cmake/omptest-config.cmake @ONLY) + +install(FILES ${omptest_BINARY_DIR}/cmake/omptest-config.cmake + DESTINATION "${OPENMP_INSTALL_LIBDIR}/cmake/openmp/omptest") + +# Install libomptest header files: Copy header-files from include dir +install(DIRECTORY ./include + DESTINATION "${LIBOMP_HEADERS_INSTALL_PATH}/omptest" + FILES_MATCHING PATTERN "*.h") + +install(TARGETS omptest LIBRARY COMPONENT omptest + DESTINATION "${OPENMP_INSTALL_LIBDIR}") + +# Discover unit tests (added to check-openmp) +if(LIBOMPTEST_BUILD_UNITTESTS) + add_subdirectory(test) +endif() diff --git a/openmp/tools/omptest/README.md b/openmp/tools/omptest/README.md new file mode 100644 index 0000000000000..bfed871b59bdb --- /dev/null +++ b/openmp/tools/omptest/README.md @@ -0,0 +1,279 @@ + +README for the OpenMP Tooling Interface Testing Library (ompTest) +================================================================= + +# Introduction +OpenMP Tooling Interface Testing Library (ompTest) +ompTest is a unit testing framework for testing OpenMP implementations. +It offers a simple-to-use framework that allows a tester to check for OMPT +events in addition to regular unit testing code, supported by linking against +GoogleTest by default. It also facilitates writing concise tests while bridging +the semantic gap between the unit under test and the OMPT-event testing. + +# Testing macros + +Corresponding macro definitions are located in: `./include/AssertMacros.h` + +## OMPT_GENERATE_EVENTS(NumberOfCopies, EventMacro) +`TODO` + +## OMPT_ASSERT_SET_EVENT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_GROUPED(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NAMED(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_EVENT_NOT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NOT(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_GROUPED_NOT(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SET_NAMED_NOT(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_EVENT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE(EventTy, ...) +This macro checks for the occurrence of the provided event, which also +entails the exact sequence of events. When only using this assertion macro one +has to provide every single event in the exact order of occurrence. + +## OMPT_ASSERT_SEQUENCE_GROUPED(Group, EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE` with the addition of grouping. + +## OMPT_ASSERT_SEQUENCE_NAMED(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_NOT(EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_GROUPED_NOT(Group, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_NAMED_NOT(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERT_SEQUENCE_SUSPEND() +`TODO` + +## OMPT_ASSERT_SEQUENCE_ONLY(EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE`, while actually being preceded +-AND- succeeded by commands to suspend sequenced assertion until the next match. +As a result, one may omit all other "unneccessary" events from the sequence. + +## OMPT_ASSERT_SEQUENCE_GROUPED_ONLY(Group, EventTy, ...) +This macro acts like `OMPT_ASSERT_SEQUENCE_ONLY`, plus grouping. + +## OMPT_ASSERT_SEQUENCE_NAMED_ONLY(Name, EventTy, ...) +`TODO` + +## OMPT_ASSERTER_MODE_STRICT(Asserter) +`TODO` + +## OMPT_ASSERTER_MODE_RELAXED(Asserter) +`TODO` + +## OMPT_ASSERT_SEQUENCE_MODE_STRICT() +`TODO` + +## OMPT_ASSERT_SEQUENCE_MODE_RELAXED() +`TODO` + +## OMPT_ASSERT_SET_MODE_STRICT() +`TODO` + +## OMPT_ASSERT_SET_MODE_RELAXED() +`TODO` + +## OMPT_ASSERTER_DISABLE(Asserter) +`TODO` + +## OMPT_ASSERTER_ENABLE(Asserter) +`TODO` + +## OMPT_ASSERT_SET_DISABLE() +`TODO` + +## OMPT_ASSERT_SET_ENABLE() +`TODO` + +## OMPT_ASSERT_SEQUENCE_DISABLE() +`TODO` + +## OMPT_ASSERT_SEQUENCE_ENABLE() +`TODO` + +## OMPT_REPORT_EVENT_DISABLE() +`TODO` + +## OMPT_REPORT_EVENT_ENABLE() +`TODO` + +## OMPT_ASSERTER_PERMIT_EVENT(Asserter, EventTy) +`TODO` + +## OMPT_ASSERTER_SUPPRESS_EVENT(Asserter, EventTy) +`TODO` + +## OMPT_PERMIT_EVENT(EventTy) +`TODO` + +## OMPT_SUPPRESS_EVENT(EventTy) +`TODO` + +## OMPT_ASSERTER_LOG_LEVEL(Asserter, LogLevel) +`TODO` + +## OMPT_ASSERTER_LOG_FORMATTED(Asserter, FormatLog) +`TODO` + +## OMPT_ASSERT_SYNC_POINT(SyncPointName) +`TODO` + +### Grouping Asserts + +This allows to generate and verify data during runtime of a test. +Currently, we only use target region information which manifests into groups. +This allows to correlate multiple events to a certain target region without +manual interaction just by specifying a groupname for these events. + +When a target region is encountered and we are about to enter it, we gather the +`target_id` (non-EMI) -OR- `target_data->value` (EMI). This value is stored +along the groupname for future reference. Upon target region end, the +corresponding group is erased. (Note: The groupname is available again.) + +Other asserted callbacks which may occur within target regions query their +groupname: retrieving and comparing the value of the group against the observed +event's value. + +### Suspending Sequenced Asserts + +When a sequence of events is not of interest while testing, these additional +events may be ignored by suspending the assertion until the next match. This +can be done by using `OMPT_ASSERT_SEQUENCE_SUSPEND` manually or the `_ONLY` +macro variants, like `OMPT_ASSERT_GROUPED_SEQUENCE_ONLY`. + +The former adds a special event to the queue of expected events and signal +that any non-matching event should be ignored rather than failing the test. +`_ONLY` macros embed their corresponding macro between two calls to +`OMPT_ASSERT_SEQUENCE_SUSPEND`. As a consequence, we enter passive assertion +until a match occurs, then enter passive assertion again. This enables us to +"only" assert a certain, single event in arbitrary circumstances. + +### Asserter Modes +`TODO` + +## Aliases (shorthands) +To allow for easier writing of tests and enhanced readability, the following set +of aliases is introduced. The left hand side represents the original value, +while the right hand side depicts the shorthand version. + +| Type | Enum Value | Shorthand | +|---------------------------|---------------------------------------------|---------------------------| +| **ompt_scope_endpoint_t** | | | +| | ompt_scope_begin | BEGIN | +| | ompt_scope_end | END | +| | ompt_scope_beginend | BEGINEND | +| **ompt_target_t** | | | +| | ompt_target | TARGET | +| | ompt_target_enter_data | ENTER_DATA | +| | ompt_target_exit_data | EXIT_DATA | +| | ompt_target_update | UPDATE | +| | ompt_target_nowait | TARGET_NOWAIT | +| | ompt_target_enter_data_nowait | ENTER_DATA_NOWAIT | +| | ompt_target_exit_data_nowait | EXIT_DATA_NOWAIT | +| | ompt_target_update_nowait | UPDATE_NOWAIT | +| **ompt_target_data_op_t** | | | +| | ompt_target_data_alloc | ALLOC | +| | ompt_target_data_transfer_to_device | H2D | +| | ompt_target_data_transfer_from_device | D2H | +| | ompt_target_data_delete | DELETE | +| | ompt_target_data_associate | ASSOCIATE | +| | ompt_target_data_disassociate | DISASSOCIATE | +| | ompt_target_data_alloc_async | ALLOC_ASYNC | +| | ompt_target_data_transfer_to_device_async | H2D_ASYNC | +| | ompt_target_data_transfer_from_device_async | D2H_ASYNC | +| | ompt_target_data_delete_async | DELETE_ASYNC | +| **ompt_callbacks_t** | | | +| | ompt_callback_target | CB_TARGET | +| | ompt_callback_target_data_op | CB_DATAOP | +| | ompt_callback_target_submit | CB_KERNEL | +| **ompt_work_t** | | | +| | ompt_work_loop | WORK_LOOP | +| | ompt_work_sections | WORK_SECT | +| | ompt_work_single_executor | WORK_EXEC | +| | ompt_work_single_other | WORK_SINGLE | +| | ompt_work_workshare | WORK_SHARE | +| | ompt_work_distribute | WORK_DIST | +| | ompt_work_taskloop | WORK_TASK | +| | ompt_work_scope | WORK_SCOPE | +| | ompt_work_loop_static | WORK_LOOP_STA | +| | ompt_work_loop_dynamic | WORK_LOOP_DYN | +| | ompt_work_loop_guided | WORK_LOOP_GUI | +| | ompt_work_loop_other | WORK_LOOP_OTH | +| **ompt_sync_region_t** | | | +| | ompt_sync_region_barrier | SR_BARRIER | +| | ompt_sync_region_barrier_implicit | SR_BARRIER_IMPL | +| | ompt_sync_region_barrier_explicit | SR_BARRIER_EXPL | +| | ompt_sync_region_barrier_implementation | SR_BARRIER_IMPLEMENTATION | +| | ompt_sync_region_taskwait | SR_TASKWAIT | +| | ompt_sync_region_taskgroup | SR_TASKGROUP | +| | ompt_sync_region_reduction | SR_REDUCTION | +| | ompt_sync_region_barrier_implicit_workshare | SR_BARRIER_IMPL_WORKSHARE | +| | ompt_sync_region_barrier_implicit_parallel | SR_BARRIER_IMPL_PARALLEL | +| | ompt_sync_region_barrier_teams | SR_BARRIER_TEAMS | + + +Limitations +=========== +Currently, there are some peculiarities which have to be kept in mind when using +this library: + +## Callbacks + * It is not possible to e.g. test non-EMI -AND- EMI callbacks within the same + test file. Reason: all testsuites share the initialization and therefore the + registered callbacks. + * It is not possible to check for device initialization and/or load callbacks + more than once per test file. The first testcase being run, triggers these + callbacks and is therefore the only testcase that is able to check for them. + This is because, after that, the device remains initialized. + * It is not possible to check for device finalization callbacks, as libomptest + is un-loaded before this callback occurs. Same holds true for the final + ThreadEnd event(s). + +Miscellaneous +============= + +## Default values + +To allow for easier writing of tests, many OMPT events may be created using less +parameters than actually requested by the spec -- by using default values. These +defaults are currently set to the corresponding data type's minimum as follows, +for example integers use: `std::numeric_limits::min()`. + +When an expected / user-specified event has certain values set to the +corresponding default, these values are ignored. That is, when compared to an +observed event, this property is considered as 'equal' regardless of their +actual equality relation. + +References +========== +[0]: ompTest – Unit Testing with OMPT + https://doi.org/10.1109/SCW63240.2024.00031 + +[1]: OMPTBench – OpenMP Tool Interface Conformance Testing + https://doi.org/10.1109/SCW63240.2024.00036 diff --git a/openmp/tools/omptest/cmake/omptest-config.cmake.in b/openmp/tools/omptest/cmake/omptest-config.cmake.in new file mode 100644 index 0000000000000..bfcb8a72275eb --- /dev/null +++ b/openmp/tools/omptest/cmake/omptest-config.cmake.in @@ -0,0 +1,30 @@ +################################################################################ +## +## omptest cmake configuration file. +## Enable support for find_package(omptest). +## +################################################################################ + +# Compute LLVM installation prefix (root) relative to this file. +# Subsequent get_filename_component calls move up one directory level. (cf. /..) +get_filename_component(LLVM_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}" REALPATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) +get_filename_component(LLVM_INSTALL_PREFIX "${LLVM_INSTALL_PREFIX}" PATH) + +# Provide header and library paths. +set(LIBOMP_HEADERS_INSTALL_PATH "${LLVM_INSTALL_PREFIX}/@LIBOMP_HEADERS_INSTALL_PATH@") +set(LIBOMP_LIBRARY_INSTALL_PATH "${LLVM_INSTALL_PREFIX}/@OPENMP_INSTALL_LIBDIR@") +set(omptest_INCLUDE_DIR "${LIBOMP_HEADERS_INSTALL_PATH}/omptest/include") +set(omptest_LIBRARY_DIR "${LIBOMP_LIBRARY_INSTALL_PATH}") + +# Provide compiler default values. +set(LLVM_BIN_INSTALL_DIR "${LLVM_INSTALL_PREFIX}/bin") +set(omptest_C_COMPILER "${LLVM_BIN_INSTALL_DIR}/clang") +set(omptest_CXX_COMPILER "${LLVM_BIN_INSTALL_DIR}/clang++") + +# Provide information, if ompTest has been built 'standalone'. +set(LIBOMPTEST_BUILD_STANDALONE "@LIBOMPTEST_BUILD_STANDALONE@") diff --git a/openmp/tools/omptest/include/AssertMacros.h b/openmp/tools/omptest/include/AssertMacros.h new file mode 100644 index 0000000000000..d5d191c10dabb --- /dev/null +++ b/openmp/tools/omptest/include/AssertMacros.h @@ -0,0 +1,138 @@ +//===- AssertMacros.h - Macro aliases for ease-of-use -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides macros to be used in unit tests for OMPT events. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_ASSERTMACROS_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_ASSERTMACROS_H + +#define OMPTEST_EXCLUDED_EVENT omptest::ObserveState::never +#define OMPTEST_REQUIRED_EVENT omptest::ObserveState::always + +/// ASSERT MACROS TO BE USED BY THE USER + +#define OMPT_GENERATE_EVENTS(NumberOfCopies, EventMacro) \ + for (size_t i = 0; i < NumberOfCopies; ++i) { \ + EventMacro \ + } + +// Handle a minimum unordered set of events +// Required events +#define OMPT_ASSERT_SET_EVENT(Name, Group, EventTy, ...) \ + SetAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_REQUIRED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SET(EventTy, ...) \ + OMPT_ASSERT_SET_EVENT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_GROUPED(Group, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_NAMED(Name, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT(Name, "", EventTy, __VA_ARGS__) +// Excluded ("NOT") events +#define OMPT_ASSERT_SET_EVENT_NOT(Name, Group, EventTy, ...) \ + SetAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_EXCLUDED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SET_NOT(EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_GROUPED_NOT(Group, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SET_NAMED_NOT(Name, EventTy, ...) \ + OMPT_ASSERT_SET_EVENT_NOT(Name, "", EventTy, __VA_ARGS__) + +// Handle an exact sequence of events +// Required events +#define OMPT_ASSERT_SEQUENCE_EVENT(Name, Group, EventTy, ...) \ + SequenceAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_REQUIRED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SEQUENCE(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_GROUPED(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_NAMED(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT(Name, "", EventTy, __VA_ARGS__) +// Excluded ("NOT") events +#define OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, Group, EventTy, ...) \ + SequenceAsserter->insert(OmptAssertEvent::EventTy( \ + Name, Group, OMPTEST_EXCLUDED_EVENT, __VA_ARGS__)); +#define OMPT_ASSERT_SEQUENCE_NOT(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT("", "", EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_GROUPED_NOT(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT("", Group, EventTy, __VA_ARGS__) +#define OMPT_ASSERT_SEQUENCE_NAMED_NOT(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_EVENT_NOT(Name, "", EventTy, __VA_ARGS__) +// Special command: suspend active assertion +// The created event is not correlated to any observed event +#define OMPT_ASSERT_SEQUENCE_SUSPEND() \ + SequenceAsserter->insert( \ + OmptAssertEvent::AssertionSuspend("", "", OMPTEST_EXCLUDED_EVENT)); +#define OMPT_ASSERT_SEQUENCE_ONLY(EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT("", "", EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() +#define OMPT_ASSERT_SEQUENCE_GROUPED_ONLY(Group, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT("", Group, EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() +#define OMPT_ASSERT_SEQUENCE_NAMED_ONLY(Name, EventTy, ...) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() \ + OMPT_ASSERT_SEQUENCE_EVENT(Name, "", EventTy, __VA_ARGS__) \ + OMPT_ASSERT_SEQUENCE_SUSPEND() + +#define OMPT_ASSERTER_MODE_STRICT(Asserter) \ + Asserter->setOperationMode(AssertMode::strict); +#define OMPT_ASSERTER_MODE_RELAXED(Asserter) \ + Asserter->setOperationMode(AssertMode::relaxed); +#define OMPT_ASSERT_SEQUENCE_MODE_STRICT() \ + OMPT_ASSERTER_MODE_STRICT(SequenceAsserter) +#define OMPT_ASSERT_SEQUENCE_MODE_RELAXED() \ + OMPT_ASSERTER_MODE_RELAXED(SequenceAsserter) +#define OMPT_ASSERT_SET_MODE_STRICT() OMPT_ASSERTER_MODE_STRICT(SetAsserter) +#define OMPT_ASSERT_SET_MODE_RELAXED() OMPT_ASSERTER_MODE_RELAXED(SetAsserter) + +// Enable / disable asserters entirely +#define OMPT_ASSERTER_DISABLE(Asserter) Asserter->setActive(false); +#define OMPT_ASSERTER_ENABLE(Asserter) Asserter->setActive(true); +#define OMPT_ASSERT_SET_DISABLE() OMPT_ASSERTER_DISABLE(SetAsserter) +#define OMPT_ASSERT_SET_ENABLE() OMPT_ASSERTER_ENABLE(SetAsserter) +#define OMPT_ASSERT_SEQUENCE_DISABLE() OMPT_ASSERTER_DISABLE(SequenceAsserter) +#define OMPT_ASSERT_SEQUENCE_ENABLE() OMPT_ASSERTER_ENABLE(SequenceAsserter) +#define OMPT_REPORT_EVENT_DISABLE() OMPT_ASSERTER_DISABLE(EventReporter) +#define OMPT_REPORT_EVENT_ENABLE() OMPT_ASSERTER_ENABLE(EventReporter) + +// Enable / disable certain event types for asserters +#define OMPT_ASSERTER_PERMIT_EVENT(Asserter, EventTy) \ + Asserter->permitEvent(EventTy); +#define OMPT_ASSERTER_SUPPRESS_EVENT(Asserter, EventTy) \ + Asserter->suppressEvent(EventTy); +#define OMPT_PERMIT_EVENT(EventTy) \ + OMPT_ASSERTER_PERMIT_EVENT(SetAsserter, EventTy); \ + OMPT_ASSERTER_PERMIT_EVENT(EventReporter, EventTy); \ + OMPT_ASSERTER_PERMIT_EVENT(SequenceAsserter, EventTy); +#define OMPT_SUPPRESS_EVENT(EventTy) \ + OMPT_ASSERTER_SUPPRESS_EVENT(SetAsserter, EventTy); \ + OMPT_ASSERTER_SUPPRESS_EVENT(EventReporter, EventTy); \ + OMPT_ASSERTER_SUPPRESS_EVENT(SequenceAsserter, EventTy); + +// Set logging level for asserters +// Note: Logger is a singleton, hence this will affect all asserter instances +#define OMPT_ASSERTER_LOG_LEVEL(Asserter, LogLevel) \ + Asserter->getLog()->setLoggingLevel(LogLevel); + +// Set log formatting (esp. coloring) for asserters +// Note: Logger is a singleton, hence this will affect all asserter instances +#define OMPT_ASSERTER_LOG_FORMATTED(Asserter, FormatLog) \ + Asserter->getLog()->setFormatOutput(FormatLog); + +// SyncPoint handling +#define OMPT_ASSERT_SYNC_POINT(SyncPointName) \ + flush_traced_devices(); \ + OmptCallbackHandler::get().handleAssertionSyncPoint(SyncPointName); + +#endif diff --git a/openmp/tools/omptest/include/InternalEvent.h b/openmp/tools/omptest/include/InternalEvent.h new file mode 100644 index 0000000000000..1348c48f72005 --- /dev/null +++ b/openmp/tools/omptest/include/InternalEvent.h @@ -0,0 +1,436 @@ +//===- InternalEvent.h - Internal event representation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Declares internal event representations along the default CTOR definition. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H + +#include "InternalEventCommon.h" + +#include +#include +#include + +#define expectedDefault(TypeName) std::numeric_limits::min() + +namespace omptest { + +namespace util { + +/// String manipulation helper function. Takes up to 8 bytes of data and returns +/// their hexadecimal representation as string. The data can be expanded to the +/// given size in bytes and will by default be prefixed with '0x'. +std::string makeHexString(uint64_t Data, bool IsPointer = true, + size_t DataBytes = 0, bool ShowHexBase = true); + +} // namespace util + +namespace internal { +struct AssertionSyncPoint : public EventBase { + std::string toString() const override; + AssertionSyncPoint(const std::string &Name) : Name(Name) {} + const std::string Name; +}; + +struct AssertionSuspend : public EventBase { + AssertionSuspend() = default; +}; + +struct ThreadBegin : public EventBase { + std::string toString() const override; + ThreadBegin(ompt_thread_t ThreadType) : ThreadType(ThreadType) {} + ompt_thread_t ThreadType; +}; + +struct ThreadEnd : public EventBase { + std::string toString() const override; + ThreadEnd() = default; +}; + +struct ParallelBegin : public EventBase { + std::string toString() const override; + ParallelBegin(int NumThreads) : NumThreads(NumThreads) {} + unsigned int NumThreads; +}; + +struct ParallelEnd : public EventBase { + std::string toString() const override; + ParallelEnd(ompt_data_t *ParallelData, ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) + : ParallelData(ParallelData), EncounteringTaskData(EncounteringTaskData), + Flags(Flags), CodeptrRA(CodeptrRA) {} + ompt_data_t *ParallelData; + ompt_data_t *EncounteringTaskData; + int Flags; + const void *CodeptrRA; +}; + +struct Work : public EventBase { + std::string toString() const override; + Work(ompt_work_t WorkType, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) + : WorkType(WorkType), Endpoint(Endpoint), ParallelData(ParallelData), + TaskData(TaskData), Count(Count), CodeptrRA(CodeptrRA) {} + ompt_work_t WorkType; + ompt_scope_endpoint_t Endpoint; + ompt_data_t *ParallelData; + ompt_data_t *TaskData; + uint64_t Count; + const void *CodeptrRA; +}; + +struct Dispatch : public EventBase { + std::string toString() const override; + Dispatch(ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance) + : ParallelData(ParallelData), TaskData(TaskData), Kind(Kind), + Instance(Instance) {} + ompt_data_t *ParallelData; + ompt_data_t *TaskData; + ompt_dispatch_t Kind; + ompt_data_t Instance; +}; + +struct TaskCreate : public EventBase { + std::string toString() const override; + TaskCreate(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *NewTaskData, int Flags, int HasDependences, + const void *CodeptrRA) + : EncounteringTaskData(EncounteringTaskData), + EncounteringTaskFrame(EncounteringTaskFrame), NewTaskData(NewTaskData), + Flags(Flags), HasDependences(HasDependences), CodeptrRA(CodeptrRA) {} + ompt_data_t *EncounteringTaskData; + const ompt_frame_t *EncounteringTaskFrame; + ompt_data_t *NewTaskData; + int Flags; + int HasDependences; + const void *CodeptrRA; +}; + +struct Dependences : public EventBase { + Dependences() = default; +}; + +struct TaskDependence : public EventBase { + TaskDependence() = default; +}; + +struct TaskSchedule : public EventBase { + TaskSchedule() = default; +}; + +struct ImplicitTask : public EventBase { + std::string toString() const override; + ImplicitTask(ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, unsigned int ActualParallelism, + unsigned int Index, int Flags) + : Endpoint(Endpoint), ParallelData(ParallelData), TaskData(TaskData), + ActualParallelism(ActualParallelism), Index(Index), Flags(Flags) {} + ompt_scope_endpoint_t Endpoint; + ompt_data_t *ParallelData; + ompt_data_t *TaskData; + unsigned int ActualParallelism; + unsigned int Index; + int Flags; +}; + +struct Masked : public EventBase { + Masked() = default; +}; + +struct SyncRegion : public EventBase { + std::string toString() const override; + SyncRegion(ompt_sync_region_t Kind, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + const void *CodeptrRA) + : Kind(Kind), Endpoint(Endpoint), ParallelData(ParallelData), + TaskData(TaskData), CodeptrRA(CodeptrRA) {} + ompt_sync_region_t Kind; + ompt_scope_endpoint_t Endpoint; + ompt_data_t *ParallelData; + ompt_data_t *TaskData; + const void *CodeptrRA; +}; + +struct MutexAcquire : public EventBase { + MutexAcquire() = default; +}; + +struct Mutex : public EventBase { + Mutex() = default; +}; + +struct NestLock : public EventBase { + NestLock() = default; +}; + +struct Flush : public EventBase { + Flush() = default; +}; + +struct Cancel : public EventBase { + Cancel() = default; +}; + +struct Target : public EventBase { + std::string toString() const override; + Target(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_id_t TargetId, const void *CodeptrRA) + : Kind(Kind), Endpoint(Endpoint), DeviceNum(DeviceNum), + TaskData(TaskData), TargetId(TargetId), CodeptrRA(CodeptrRA) {} + ompt_target_t Kind; + ompt_scope_endpoint_t Endpoint; + int DeviceNum; + ompt_data_t *TaskData; + ompt_id_t TargetId; + const void *CodeptrRA; +}; + +struct TargetEmi : public EventBase { + std::string toString() const override; + TargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, const void *CodeptrRA) + : Kind(Kind), Endpoint(Endpoint), DeviceNum(DeviceNum), + TaskData(TaskData), TargetTaskData(TargetTaskData), + TargetData(TargetData), CodeptrRA(CodeptrRA) {} + ompt_target_t Kind; + ompt_scope_endpoint_t Endpoint; + int DeviceNum; + ompt_data_t *TaskData; + ompt_data_t *TargetTaskData; + ompt_data_t *TargetData; + const void *CodeptrRA; +}; + +struct TargetDataOp : public EventBase { + std::string toString() const override; + TargetDataOp(ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, + const void *CodeptrRA) + : TargetId(TargetId), HostOpId(HostOpId), OpType(OpType), + SrcAddr(SrcAddr), SrcDeviceNum(SrcDeviceNum), DstAddr(DstAddr), + DstDeviceNum(DstDeviceNum), Bytes(Bytes), CodeptrRA(CodeptrRA) {} + ompt_id_t TargetId; + ompt_id_t HostOpId; + ompt_target_data_op_t OpType; + void *SrcAddr; + int SrcDeviceNum; + void *DstAddr; + int DstDeviceNum; + size_t Bytes; + const void *CodeptrRA; +}; + +struct TargetDataOpEmi : public EventBase { + std::string toString() const override; + TargetDataOpEmi(ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, + const void *CodeptrRA) + : Endpoint(Endpoint), TargetTaskData(TargetTaskData), + TargetData(TargetData), HostOpId(HostOpId), OpType(OpType), + SrcAddr(SrcAddr), SrcDeviceNum(SrcDeviceNum), DstAddr(DstAddr), + DstDeviceNum(DstDeviceNum), Bytes(Bytes), CodeptrRA(CodeptrRA) {} + ompt_scope_endpoint_t Endpoint; + ompt_data_t *TargetTaskData; + ompt_data_t *TargetData; + ompt_id_t *HostOpId; + ompt_target_data_op_t OpType; + void *SrcAddr; + int SrcDeviceNum; + void *DstAddr; + int DstDeviceNum; + size_t Bytes; + const void *CodeptrRA; +}; + +struct TargetSubmit : public EventBase { + std::string toString() const override; + TargetSubmit(ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams) + : TargetId(TargetId), HostOpId(HostOpId), + RequestedNumTeams(RequestedNumTeams) {} + ompt_id_t TargetId; + ompt_id_t HostOpId; + unsigned int RequestedNumTeams; +}; + +struct TargetSubmitEmi : public EventBase { + std::string toString() const override; + TargetSubmitEmi(ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetData, + ompt_id_t *HostOpId, unsigned int RequestedNumTeams) + : Endpoint(Endpoint), TargetData(TargetData), HostOpId(HostOpId), + RequestedNumTeams(RequestedNumTeams) {} + ompt_scope_endpoint_t Endpoint; + ompt_data_t *TargetData; + ompt_id_t *HostOpId; + unsigned int RequestedNumTeams; +}; + +struct ControlTool : public EventBase { + ControlTool() = default; +}; + +struct DeviceInitialize : public EventBase { + std::string toString() const override; + DeviceInitialize(int DeviceNum, const char *Type, ompt_device_t *Device, + ompt_function_lookup_t LookupFn, const char *DocStr) + : DeviceNum(DeviceNum), Type(Type), Device(Device), LookupFn(LookupFn), + DocStr(DocStr) {} + int DeviceNum; + const char *Type; + ompt_device_t *Device; + ompt_function_lookup_t LookupFn; + const char *DocStr; +}; + +struct DeviceFinalize : public EventBase { + std::string toString() const override; + DeviceFinalize(int DeviceNum) : DeviceNum(DeviceNum) {} + int DeviceNum; +}; + +struct DeviceLoad : public EventBase { + std::string toString() const override; + DeviceLoad(int DeviceNum, const char *Filename, int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) + : DeviceNum(DeviceNum), Filename(Filename), OffsetInFile(OffsetInFile), + VmaInFile(VmaInFile), Bytes(Bytes), HostAddr(HostAddr), + DeviceAddr(DeviceAddr), ModuleId(ModuleId) {} + int DeviceNum; + const char *Filename; + int64_t OffsetInFile; + void *VmaInFile; + size_t Bytes; + void *HostAddr; + void *DeviceAddr; + uint64_t ModuleId; +}; + +struct DeviceUnload : public EventBase { + DeviceUnload() = default; +}; + +struct BufferRequest : public EventBase { + std::string toString() const override; + BufferRequest(int DeviceNum, ompt_buffer_t **Buffer, size_t *Bytes) + : DeviceNum(DeviceNum), Buffer(Buffer), Bytes(Bytes) {} + int DeviceNum; + ompt_buffer_t **Buffer; + size_t *Bytes; +}; + +struct BufferComplete : public EventBase { + std::string toString() const override; + BufferComplete(int DeviceNum, ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned) + : DeviceNum(DeviceNum), Buffer(Buffer), Bytes(Bytes), Begin(Begin), + BufferOwned(BufferOwned) {} + int DeviceNum; + ompt_buffer_t *Buffer; + size_t Bytes; + ompt_buffer_cursor_t Begin; + int BufferOwned; +}; + +struct BufferRecord : public EventBase { + std::string toString() const override; + BufferRecord(ompt_record_ompt_t *RecordPtr) : RecordPtr(RecordPtr) { + if (RecordPtr != nullptr) + Record = *RecordPtr; + else + memset(&Record, 0, sizeof(ompt_record_ompt_t)); + } + ompt_record_ompt_t Record; + ompt_record_ompt_t *RecordPtr; +}; + +struct BufferRecordDeallocation : public EventBase { + std::string toString() const override; + BufferRecordDeallocation(ompt_buffer_t *Buffer) : Buffer(Buffer) {} + ompt_buffer_t *Buffer; +}; + +// Add specialized event equality operators here. +// Note: Placement of these forward declarations is important as they need to +// take precedence over the following default equality operator definition. +bool operator==(const ParallelBegin &, const ParallelBegin &); +bool operator==(const Work &, const Work &); +bool operator==(const ImplicitTask &, const ImplicitTask &); +bool operator==(const SyncRegion &, const SyncRegion &); +bool operator==(const Target &, const Target &); +bool operator==(const TargetEmi &, const TargetEmi &); +bool operator==(const TargetDataOp &, const TargetDataOp &); +bool operator==(const TargetDataOpEmi &, const TargetDataOpEmi &); +bool operator==(const TargetSubmit &, const TargetSubmit &); +bool operator==(const TargetSubmitEmi &, const TargetSubmitEmi &); +bool operator==(const DeviceInitialize &, const DeviceInitialize &); +bool operator==(const DeviceFinalize &, const DeviceFinalize &); +bool operator==(const DeviceLoad &, const DeviceLoad &); +bool operator==(const BufferRequest &, const BufferRequest &); +bool operator==(const BufferComplete &, const BufferComplete &); +bool operator==(const BufferRecord &, const BufferRecord &); + +/// Default (fallback) event equality operator definition. +template bool operator==(const Event &, const Event &) { + return true; +} + +// clang-format off +event_type_trait(AssertionSyncPoint) +event_type_trait(AssertionSuspend) +event_type_trait(ThreadBegin) +event_type_trait(ThreadEnd) +event_type_trait(ParallelBegin) +event_type_trait(ParallelEnd) +event_type_trait(Work) +event_type_trait(Dispatch) +event_type_trait(TaskCreate) +event_type_trait(Dependences) +event_type_trait(TaskDependence) +event_type_trait(TaskSchedule) +event_type_trait(ImplicitTask) +event_type_trait(Masked) +event_type_trait(SyncRegion) +event_type_trait(MutexAcquire) +event_type_trait(Mutex) +event_type_trait(NestLock) +event_type_trait(Flush) +event_type_trait(Cancel) +event_type_trait(Target) +event_type_trait(TargetEmi) +event_type_trait(TargetDataOp) +event_type_trait(TargetDataOpEmi) +event_type_trait(TargetSubmit) +event_type_trait(TargetSubmitEmi) +event_type_trait(ControlTool) +event_type_trait(DeviceInitialize) +event_type_trait(DeviceFinalize) +event_type_trait(DeviceLoad) +event_type_trait(DeviceUnload) +event_type_trait(BufferRequest) +event_type_trait(BufferComplete) +event_type_trait(BufferRecord) +event_type_trait(BufferRecordDeallocation) +// clang-format on + +} // namespace internal + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/InternalEventCommon.h b/openmp/tools/omptest/include/InternalEventCommon.h new file mode 100644 index 0000000000000..5c0e54edf15c2 --- /dev/null +++ b/openmp/tools/omptest/include/InternalEventCommon.h @@ -0,0 +1,128 @@ +//===- InternalEventCommon.h - Common internal event basics -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides event types, and class/operator declaration macros. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENTCOMMON_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENTCOMMON_H + +#include "omp-tools.h" + +#include +#include + +namespace omptest { + +namespace internal { +/// Enum values are used for comparison of observed and asserted events +/// List is based on OpenMP 5.2 specification, table 19.2 (page 447) +enum class EventTy { + None, // not part of OpenMP spec, used for implementation + AssertionSyncPoint, // not part of OpenMP spec, used for implementation + AssertionSuspend, // not part of OpenMP spec, used for implementation + BufferRecord, // not part of OpenMP spec, used for implementation + BufferRecordDeallocation, // not part of OpenMP spec, used for implementation + ThreadBegin, + ThreadEnd, + ParallelBegin, + ParallelEnd, + Work, + Dispatch, + TaskCreate, // TODO: Implement + Dependences, // TODO: Implement + TaskDependence, // TODO: Implement + TaskSchedule, // TODO: Implement + ImplicitTask, // TODO: Implement + Masked, // TODO: Implement + SyncRegion, + MutexAcquire, // TODO: Implement + Mutex, // TODO: Implement + NestLock, // TODO: Implement + Flush, // TODO: Implement + Cancel, // TODO: Implement + DeviceInitialize, + DeviceFinalize, + DeviceLoad, + DeviceUnload, + BufferRequest, + BufferComplete, + TargetDataOp, + TargetDataOpEmi, + Target, + TargetEmi, + TargetSubmit, + TargetSubmitEmi, + ControlTool +}; + +struct InternalEvent { + EventTy Type; + EventTy getType() const { return Type; } + + InternalEvent() : Type(EventTy::None) {} + InternalEvent(EventTy T) : Type(T) {} + virtual ~InternalEvent() = default; + + virtual bool equals(const InternalEvent *o) const { + assert(false && "Base class implementation"); + return false; + }; + + virtual std::string toString() const { + std::string S{"InternalEvent: Type="}; + S.append(std::to_string((uint32_t)Type)); + return S; + } +}; + +/// Specialize EventType member for each derived internal event type. +/// Effectively selecting an event type as initialization value. +template struct EventTypeOf; + +/// Actual definition macro for EventTypeOf. +#define event_type_trait(EvTy) \ + template <> struct EventTypeOf { \ + static constexpr EventTy Value = EventTy::EvTy; \ + }; + +/// CRTP (Curiously Recurring Template Pattern) intermediate class +/// Adding a new event type can be achieved by inheriting from an EventBase +/// template instantiation of the new class' name, like this: +/// struct NewEventType : public EventBase +template class EventBase : public InternalEvent { +public: + static constexpr EventTy EventType = EventTypeOf::Value; + EventBase() : InternalEvent(EventType) {} + virtual ~EventBase() = default; + + /// Equals method to cast and dispatch to the specific class operator== + virtual bool equals(const InternalEvent *o) const override { + // Note: When the if-condition evaluates to true, the event types are + // trivially identical. Otherwise, a cast to the Derived pointer would have + // been impossible. + if (const auto Other = dynamic_cast(o)) + return operator==(*static_cast(this), *Other); + return false; + } + + /// Basic toString method, which may be overridden with own implementations. + virtual std::string toString() const override { + std::string S{"EventBase: Type="}; + S.append(std::to_string((uint32_t)Type)); + return S; + } +}; + +} // namespace internal + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/Logging.h b/openmp/tools/omptest/include/Logging.h new file mode 100644 index 0000000000000..0104191b1d15f --- /dev/null +++ b/openmp/tools/omptest/include/Logging.h @@ -0,0 +1,155 @@ +//===- Logging.h - General logging class ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Provides ompTest-tailored logging, with log-levels and formatting/coloring. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_LOGGING_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_LOGGING_H + +#include "OmptAssertEvent.h" + +#include +#include +#include +#include +#include +#include + +namespace omptest { +namespace logging { + +enum class Level : uint32_t { + // Levels (Note: DEBUG may already be reserved) + DIAGNOSTIC = 10, + INFO = 20, + WARNING = 30, + ERROR = 40, + CRITICAL = 50, + + // Types used for formatting options + Default, + ExpectedEvent, + ObservedEvent, + OffendingEvent, + + // Suppress all prints + SILENT = 0xFFFFFFFF +}; + +enum class FormatOption : uint32_t { + // General options + // Note: BOLD is actually "BRIGHT" -- But it will be perceived as 'bold' font + // It is implicitly switching colors to the 'Light' variant + // Thus, it has -NO EFFECT- when already using a Light* color + NONE = 0, + BOLD = 1, + DIM = 2, + UNDERLINED = 4, + BLINK = 5, + INVERTED = 7, + HIDDEN = 8, + // Foreground colors + COLOR_Default = 39, + COLOR_Black = 30, + COLOR_Red = 31, + COLOR_Green = 32, + COLOR_Yellow = 33, + COLOR_Blue = 34, + COLOR_Magenta = 35, + COLOR_Cyan = 36, + COLOR_LightGray = 37, + COLOR_DarkGray = 90, + COLOR_LightRed = 91, + COLOR_LightGreen = 92, + COLOR_LightYellow = 93, + COLOR_LightBlue = 94, + COLOR_LightMagenta = 95, + COLOR_LightCyan = 96, + COLOR_White = 97, + // Background colors + COLOR_BG_Default = 49, + COLOR_BG_Black = 40, + COLOR_BG_Red = 41, + COLOR_BG_Green = 42, + COLOR_BG_Yellow = 43, + COLOR_BG_Blue = 44, + COLOR_BG_Magenta = 45, + COLOR_BG_Cyan = 46, + COLOR_BG_LightGray = 47, + COLOR_BG_DarkGray = 100, + COLOR_BG_LightRed = 101, + COLOR_BG_LightGreen = 102, + COLOR_BG_LightYellow = 103, + COLOR_BG_LightBlue = 104, + COLOR_BG_LightMagenta = 105, + COLOR_BG_LightCyan = 106, + COLOR_BG_White = 107 +}; + +/// Returns a string representation of the given logging level. +const char *to_string(Level LogLevel); + +/// Returns the format options as escaped sequence, for the given logging level +std::string getFormatSequence(Level LogLevel = Level::Default); + +/// Format the given message with the provided option(s) and return it. +/// Here formatting is only concerning control sequences using character +/// which can be obtained using '\e' (on console), '\033' or '\x1B'. +std::string format(const std::string &Message, FormatOption Option); +std::string format(const std::string &Message, std::set Options); + +class Logger { +public: + Logger(Level LogLevel = Level::WARNING, std::ostream &OutStream = std::cerr, + bool FormatOutput = true); + ~Logger(); + + /// Log the given message to the output. + void log(Level LogLevel, const std::string &Message) const; + + /// Log a single event mismatch. + void eventMismatch(const omptest::OmptAssertEvent &OffendingEvent, + const std::string &Message, + Level LogLevel = Level::ERROR) const; + + /// Log an event-pair mismatch. + void eventMismatch(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent, + const std::string &Message, + Level LogLevel = Level::ERROR) const; + + /// Set if output is being formatted (e.g. colored). + void setFormatOutput(bool Enabled); + + /// Return the current (minimum) Logging Level. + Level getLoggingLevel() const; + + /// Set the (minimum) Logging Level. + void setLoggingLevel(Level LogLevel); + +private: + /// The minimum logging level that is considered by the logger instance. + Level LoggingLevel; + + /// The output stream used by the logger instance. + std::ostream &OutStream; + + /// Determine if log messages are formatted using control sequences. + bool FormatOutput; + + /// Mutex to ensure serialized logging + mutable std::mutex LogMutex; +}; + +} // namespace logging +} // namespace omptest + +#endif \ No newline at end of file diff --git a/openmp/tools/omptest/include/OmptAliases.h b/openmp/tools/omptest/include/OmptAliases.h new file mode 100644 index 0000000000000..500be5ef9f749 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAliases.h @@ -0,0 +1,85 @@ +//===- OmptAliases.h - Shorthand aliases for OMPT enum values ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines shorthand aliases for OMPT enum values, providing improved +/// ease-of-use and readability. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTALIASES_H + +#include + +/// Aliases for enum: ompt_scope_endpoint_t +constexpr ompt_scope_endpoint_t BEGIN = ompt_scope_begin; +constexpr ompt_scope_endpoint_t END = ompt_scope_end; +constexpr ompt_scope_endpoint_t BEGINEND = ompt_scope_beginend; + +/// Aliases for enum: ompt_target_t +constexpr ompt_target_t TARGET = ompt_target; +constexpr ompt_target_t ENTER_DATA = ompt_target_enter_data; +constexpr ompt_target_t EXIT_DATA = ompt_target_exit_data; +constexpr ompt_target_t UPDATE = ompt_target_update; +constexpr ompt_target_t TARGET_NOWAIT = ompt_target_nowait; +constexpr ompt_target_t ENTER_DATA_NOWAIT = ompt_target_enter_data_nowait; +constexpr ompt_target_t EXIT_DATA_NOWAIT = ompt_target_exit_data_nowait; +constexpr ompt_target_t UPDATE_NOWAIT = ompt_target_update_nowait; + +/// Aliases for enum: ompt_target_data_op_t +constexpr ompt_target_data_op_t ALLOC = ompt_target_data_alloc; +constexpr ompt_target_data_op_t H2D = ompt_target_data_transfer_to_device; +constexpr ompt_target_data_op_t D2H = ompt_target_data_transfer_from_device; +constexpr ompt_target_data_op_t DELETE = ompt_target_data_delete; +constexpr ompt_target_data_op_t ASSOCIATE = ompt_target_data_associate; +constexpr ompt_target_data_op_t DISASSOCIATE = ompt_target_data_disassociate; +constexpr ompt_target_data_op_t ALLOC_ASYNC = ompt_target_data_alloc_async; +constexpr ompt_target_data_op_t H2D_ASYNC = + ompt_target_data_transfer_to_device_async; +constexpr ompt_target_data_op_t D2H_ASYNC = + ompt_target_data_transfer_from_device_async; +constexpr ompt_target_data_op_t DELETE_ASYNC = ompt_target_data_delete_async; + +/// Aliases for enum: ompt_callbacks_t (partial) +constexpr ompt_callbacks_t CB_TARGET = ompt_callback_target; +constexpr ompt_callbacks_t CB_DATAOP = ompt_callback_target_data_op; +constexpr ompt_callbacks_t CB_KERNEL = ompt_callback_target_submit; + +/// Aliases for enum: ompt_work_t +constexpr ompt_work_t WORK_LOOP = ompt_work_loop; +constexpr ompt_work_t WORK_SECT = ompt_work_sections; +constexpr ompt_work_t WORK_EXEC = ompt_work_single_executor; +constexpr ompt_work_t WORK_SINGLE = ompt_work_single_other; +constexpr ompt_work_t WORK_SHARE = ompt_work_workshare; +constexpr ompt_work_t WORK_DIST = ompt_work_distribute; +constexpr ompt_work_t WORK_TASK = ompt_work_taskloop; +constexpr ompt_work_t WORK_SCOPE = ompt_work_scope; +constexpr ompt_work_t WORK_LOOP_STA = ompt_work_loop_static; +constexpr ompt_work_t WORK_LOOP_DYN = ompt_work_loop_dynamic; +constexpr ompt_work_t WORK_LOOP_GUI = ompt_work_loop_guided; +constexpr ompt_work_t WORK_LOOP_OTH = ompt_work_loop_other; + +/// Aliases for enum: ompt_sync_region_t +constexpr ompt_sync_region_t SR_BARRIER = ompt_sync_region_barrier; +constexpr ompt_sync_region_t SR_BARRIER_IMPL = + ompt_sync_region_barrier_implicit; +constexpr ompt_sync_region_t SR_BARRIER_EXPL = + ompt_sync_region_barrier_explicit; +constexpr ompt_sync_region_t SR_BARRIER_IMPLEMENTATION = + ompt_sync_region_barrier_implementation; +constexpr ompt_sync_region_t SR_TASKWAIT = ompt_sync_region_taskwait; +constexpr ompt_sync_region_t SR_TASKGROUP = ompt_sync_region_taskgroup; +constexpr ompt_sync_region_t SR_REDUCTION = ompt_sync_region_reduction; +constexpr ompt_sync_region_t SR_BARRIER_IMPL_WORKSHARE = + ompt_sync_region_barrier_implicit_workshare; +constexpr ompt_sync_region_t SR_BARRIER_IMPL_PARALLEL = + ompt_sync_region_barrier_implicit_parallel; +constexpr ompt_sync_region_t SR_BARRIER_TEAMS = ompt_sync_region_barrier_teams; + +#endif diff --git a/openmp/tools/omptest/include/OmptAssertEvent.h b/openmp/tools/omptest/include/OmptAssertEvent.h new file mode 100644 index 0000000000000..87d187c823796 --- /dev/null +++ b/openmp/tools/omptest/include/OmptAssertEvent.h @@ -0,0 +1,377 @@ +//===- OmptAssertEvent.h - Assertion event declarations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains assertion event constructors, for generally all observable events. +/// This includes user-generated events, like synchronization. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTEVENT_H + +#include "InternalEvent.h" +#include "omp-tools.h" + +#include +#include +#include +#include + +namespace omptest { + +enum class ObserveState { generated, always, never }; + +/// Helper function, returning an ObserveState string representation +const char *to_string(ObserveState State); + +/// Assertion event struct, provides statically callable CTORs. +struct OmptAssertEvent { + static OmptAssertEvent AssertionSyncPoint(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + const std::string &SyncPointName); + + static OmptAssertEvent AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType); + + static OmptAssertEvent ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads); + + static OmptAssertEvent ParallelEnd( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + uint64_t Count = expectedDefault(uint64_t), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_dispatch_t Kind = expectedDefault(ompt_dispatch_t), + ompt_data_t Instance = expectedDefault(ompt_data_t)); + + static OmptAssertEvent + TaskCreate(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *EncounteringTaskData = expectedDefault(ompt_data_t *), + const ompt_frame_t *EncounteringTaskFrame = + expectedDefault(ompt_frame_t *), + ompt_data_t *NewTaskData = expectedDefault(ompt_data_t *), + int Flags = expectedDefault(int), + int HasDependences = expectedDefault(int), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent + ImplicitTask(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + unsigned int ActualParallelism = expectedDefault(unsigned int), + unsigned int Index = expectedDefault(unsigned int), + int Flags = expectedDefault(int)); + + static OmptAssertEvent + SyncRegion(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData = expectedDefault(ompt_data_t *), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(const void *)); + + static OmptAssertEvent + Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_data_t *TaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_id_t TargetId, + ompt_id_t HostOpId, ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, size_t Bytes, + const void *CodeptrRA); + + static OmptAssertEvent + TargetDataOp(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + size_t Bytes = expectedDefault(size_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent + TargetDataOpEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, + ompt_id_t *HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, + int DstDeviceNum, size_t Bytes, const void *CodeptrRA); + + static OmptAssertEvent + TargetDataOpEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + ompt_scope_endpoint_t Endpoint, + size_t Bytes = expectedDefault(size_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_data_t *TargetTaskData = expectedDefault(ompt_data_t *), + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + ompt_id_t *HostOpId = expectedDefault(ompt_id_t *), + const void *CodeptrRA = expectedDefault(void *)); + + static OmptAssertEvent TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams); + + static OmptAssertEvent + TargetSubmit(const std::string &Name, const std::string &Group, + const ObserveState &Expected, unsigned int RequestedNumTeams, + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + static OmptAssertEvent + TargetSubmitEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams); + + static OmptAssertEvent + TargetSubmitEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, unsigned int RequestedNumTeams, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData = expectedDefault(ompt_data_t *), + ompt_id_t *HostOpId = expectedDefault(ompt_id_t *)); + + static OmptAssertEvent ControlTool(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent DeviceInitialize( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Type = expectedDefault(const char *), + ompt_device_t *Device = expectedDefault(ompt_device_t *), + ompt_function_lookup_t LookupFn = expectedDefault(ompt_function_lookup_t), + const char *DocumentationStr = expectedDefault(const char *)); + + static OmptAssertEvent DeviceFinalize(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum); + + static OmptAssertEvent + DeviceLoad(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Filename = expectedDefault(const char *), + int64_t OffsetInFile = expectedDefault(int64_t), + void *VmaInFile = expectedDefault(void *), + size_t Bytes = expectedDefault(size_t), + void *HostAddr = expectedDefault(void *), + void *DeviceAddr = expectedDefault(void *), + uint64_t ModuleId = expectedDefault(int64_t)); + + static OmptAssertEvent DeviceUnload(const std::string &Name, + const std::string &Group, + const ObserveState &Expected); + + static OmptAssertEvent BufferRequest(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum, ompt_buffer_t **Buffer, + size_t *Bytes); + + static OmptAssertEvent + BufferComplete(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned); + + static OmptAssertEvent BufferRecord(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_record_ompt_t *Record); + + /// Handle type = ompt_record_target_t + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum = expectedDefault(int), + ompt_id_t TaskId = expectedDefault(ompt_id_t), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_data_op + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + std::pair Timeframe, + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_data_op + static OmptAssertEvent BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes = expectedDefault(size_t), + ompt_device_time_t MinimumTimeDelta = expectedDefault(ompt_device_time_t), + void *SrcAddr = expectedDefault(void *), + void *DstAddr = expectedDefault(void *), + int SrcDeviceNum = expectedDefault(int), + int DstDeviceNum = expectedDefault(int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t), + const void *CodeptrRA = expectedDefault(void *)); + + /// Handle type = ompt_callback_target_submit + static OmptAssertEvent + BufferRecord(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + std::pair Timeframe, + unsigned int RequestedNumTeams = expectedDefault(unsigned int), + unsigned int GrantedNumTeams = expectedDefault(unsigned int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + /// Handle type = ompt_callback_target_submit + /// Note: This will also act as the simplest default CTOR + static OmptAssertEvent BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_device_time_t MinimumTimeDelta = expectedDefault(ompt_device_time_t), + unsigned int RequestedNumTeams = expectedDefault(unsigned int), + unsigned int GrantedNumTeams = expectedDefault(unsigned int), + ompt_id_t TargetId = expectedDefault(ompt_id_t), + ompt_id_t HostOpId = expectedDefault(ompt_id_t)); + + static OmptAssertEvent BufferRecordDeallocation(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_buffer_t *Buffer); + + /// Allow move construction (due to std::unique_ptr) + OmptAssertEvent(OmptAssertEvent &&o) = default; + OmptAssertEvent &operator=(OmptAssertEvent &&o) = default; + + /// Get the event's name + std::string getEventName() const; + + /// Get the event's group name + std::string getEventGroup() const; + + /// Get the event's expected observation state + ObserveState getEventExpectedState() const; + + /// Return the actual event type enum value + internal::EventTy getEventType() const; + + /// Get a pointer to the internal event + internal::InternalEvent *getEvent() const; + + /// Make events comparable + friend bool operator==(const OmptAssertEvent &A, const OmptAssertEvent &B); + + /// Returns the string representation of the event + std::string toString(bool PrefixEventName = false) const; + +private: + OmptAssertEvent(const std::string &Name, const std::string &Group, + const ObserveState &Expected, internal::InternalEvent *IE); + OmptAssertEvent(const OmptAssertEvent &o) = delete; + + /// Determine the event name. Either it is provided directly or determined + /// from the calling function's name. + static std::string getName(const std::string &Name, + const char *Caller = __builtin_FUNCTION()) { + std::string EName = Name; + if (EName.empty()) + EName.append(Caller).append(" (auto generated)"); + + return EName; + } + + /// Determine the event name. Either it is provided directly or "default". + static std::string getGroup(const std::string &Group) { + if (Group.empty()) + return "default"; + + return Group; + } + + std::string Name; + std::string Group; + ObserveState ExpectedState; + std::unique_ptr TheEvent; +}; + +/// POD type, which holds the target region id, corresponding to an event group. +struct AssertEventGroup { + AssertEventGroup(uint64_t TargetRegion) : TargetRegion(TargetRegion) {} + uint64_t TargetRegion; +}; + +bool operator==(const OmptAssertEvent &A, const OmptAssertEvent &B); + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/OmptAsserter.h b/openmp/tools/omptest/include/OmptAsserter.h new file mode 100644 index 0000000000000..444116f2143be --- /dev/null +++ b/openmp/tools/omptest/include/OmptAsserter.h @@ -0,0 +1,291 @@ +//===- OmptAsserter.h - Asserter-related classes, enums, etc. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains all asserter-related class declarations and important enums. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTASSERTER_H + +#include "Logging.h" +#include "OmptAssertEvent.h" + +#include +#include +#include +#include +#include +#include + +namespace omptest { + +// Forward declaration. +class OmptEventGroupInterface; + +enum class AssertMode { strict, relaxed }; +enum class AssertState { pass, fail }; + +/// General base class for the subscriber/notification pattern in +/// OmptCallbachHandler. Derived classes need to implement the notify method. +class OmptListener { +public: + virtual ~OmptListener() = default; + + /// Called for each registered OMPT event of the OmptCallbackHandler + virtual void notify(omptest::OmptAssertEvent &&AE) = 0; + + /// Control whether this asserter should be considered 'active'. + void setActive(bool Enabled); + + /// Check if this asserter is considered 'active'. + bool isActive(); + + /// Check if the given event type is in the set of suppressed event types. + bool isSuppressedEventType(omptest::internal::EventTy EvTy); + + /// Remove the given event type to the set of suppressed events. + void permitEvent(omptest::internal::EventTy EvTy); + + /// Add the given event type to the set of suppressed events. + void suppressEvent(omptest::internal::EventTy EvTy); + +private: + bool Active{true}; + + // Add event types to the set of suppressed events by default. + std::set SuppressedEvents{ + omptest::internal::EventTy::ThreadBegin, + omptest::internal::EventTy::ThreadEnd, + omptest::internal::EventTy::ParallelBegin, + omptest::internal::EventTy::ParallelEnd, + omptest::internal::EventTy::Work, + omptest::internal::EventTy::Dispatch, + omptest::internal::EventTy::TaskCreate, + omptest::internal::EventTy::Dependences, + omptest::internal::EventTy::TaskDependence, + omptest::internal::EventTy::TaskSchedule, + omptest::internal::EventTy::ImplicitTask, + omptest::internal::EventTy::Masked, + omptest::internal::EventTy::SyncRegion, + omptest::internal::EventTy::MutexAcquire, + omptest::internal::EventTy::Mutex, + omptest::internal::EventTy::NestLock, + omptest::internal::EventTy::Flush, + omptest::internal::EventTy::Cancel}; +}; + +/// Base class for asserting on OMPT events +class OmptAsserter : public OmptListener { +public: + OmptAsserter(); + virtual ~OmptAsserter() = default; + + /// Add an event to the asserter's internal data structure. + virtual void insert(omptest::OmptAssertEvent &&AE); + + /// Called from the CallbackHandler with a corresponding AssertEvent to which + /// callback was handled. + void notify(omptest::OmptAssertEvent &&AE) override; + + /// Implemented in subclasses to implement what should actually be done with + /// the notification. + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) = 0; + + /// Get the number of currently remaining events, with: ObserveState::always. + virtual size_t getRemainingEventCount() = 0; + + /// Get the total number of received, effective notifications. + int getNotificationCount() { return NumNotifications; } + + /// Get the total number of successful assertion checks. + int getSuccessfulAssertionCount() { return NumSuccessfulAsserts; } + + /// Get the asserter's current operationmode: e.g.: strict or relaxed. + AssertMode getOperationMode() { return OperationMode; } + + /// Return the asserter's current state. + omptest::AssertState getState() { return State; } + + /// Determine and return the asserter's state. + virtual omptest::AssertState checkState(); + + /// Accessor for the event group interface. + std::shared_ptr getEventGroups() const { + return EventGroups; + } + + /// Accessor for the event group interface. + std::shared_ptr getLog() const { return Log; } + + /// Check the observed events' group association. If the event indicates the + /// begin/end of an OpenMP target region, we will create/deprecate the + /// expected event's group. Return true if the expected event group exists + /// (and is active), otherwise: false. Note: BufferRecords may also match with + /// deprecated groups as they may be delivered asynchronously. + bool verifyEventGroups(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent); + + /// Set the asserter's mode of operation w.r.t. assertion. + void setOperationMode(AssertMode Mode); + +protected: + /// The asserter's current state. + omptest::AssertState State{omptest::AssertState::pass}; + + /// Mutex to avoid data races w.r.t. event notifications and/or insertions. + std::mutex AssertMutex; + + /// Pointer to the OmptEventGroupInterface. + std::shared_ptr EventGroups{nullptr}; + + /// Pointer to the logging instance. + std::shared_ptr Log{nullptr}; + + /// Operation mode during assertion / notification. + AssertMode OperationMode{AssertMode::strict}; + + /// The total number of effective notifications. For example, if specific + /// notifications are to be ignored, they will not count towards this total. + int NumNotifications{0}; + + /// The number of successful assertion checks. + int NumSuccessfulAsserts{0}; + +private: + /// Mutex for creating/accessing the singleton members + static std::mutex StaticMemberAccessMutex; + + /// Static member to manage the singleton event group interface instance + static std::weak_ptr EventGroupInterfaceInstance; + + /// Static member to manage the singleton logging instance + static std::weak_ptr LoggingInstance; +}; + +/// Class that can assert in a sequenced fashion, i.e., events have to occur in +/// the order they were registered +class OmptSequencedAsserter : public OmptAsserter { +public: + OmptSequencedAsserter() : OmptAsserter(), NextEvent(0) {} + + /// Add the event to the in-sequence set of events that the asserter should + /// check for. + void insert(omptest::OmptAssertEvent &&AE) override; + + /// Implements the asserter's actual logic + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) override; + + size_t getRemainingEventCount() override; + + omptest::AssertState checkState() override; + + bool AssertionSuspended{false}; + +protected: + /// Notification helper function, implementing SyncPoint logic. Returns true + /// in case of consumed event, indicating early exit of notification. + bool consumeSyncPoint(const omptest::OmptAssertEvent &AE); + + /// Notification helper function, implementing excess event notification + /// logic. Returns true when no more events were expected, indicating early + /// exit of notification. + bool checkExcessNotify(const omptest::OmptAssertEvent &AE); + + /// Notification helper function, implementing Suspend logic. Returns true + /// in case of consumed event, indicating early exit of notification. + bool consumeSuspend(); + + /// Notification helper function, implementing regular event notification + /// logic. Returns true when a matching event was encountered, indicating + /// early exit of notification. + bool consumeRegularEvent(const omptest::OmptAssertEvent &AE); + +public: + /// Index of the next, expected event. + size_t NextEvent{0}; + std::vector Events{}; +}; + +/// Class that asserts with set semantics, i.e., unordered +struct OmptEventAsserter : public OmptAsserter { + OmptEventAsserter() : OmptAsserter(), NumEvents(0), Events() {} + + /// Add the event to the set of events that the asserter should check for. + void insert(omptest::OmptAssertEvent &&AE) override; + + /// Implements the asserter's logic + virtual void notifyImpl(omptest::OmptAssertEvent &&AE) override; + + size_t getRemainingEventCount() override; + + omptest::AssertState checkState() override; + + size_t NumEvents{0}; + + /// For now use vector (but do set semantics) + // TODO std::unordered_set? + std::vector Events{}; +}; + +/// Class that reports the occurred events +class OmptEventReporter : public OmptListener { +public: + OmptEventReporter(std::ostream &OutStream = std::cout) + : OutStream(OutStream) {} + + /// Called from the CallbackHandler with a corresponding AssertEvent to which + /// callback was handled. + void notify(omptest::OmptAssertEvent &&AE) override; + +private: + std::ostream &OutStream; +}; + +/// This class provides the members and methods to manage event groups and +/// SyncPoints in conjunction with asserters. Most importantly it maintains a +/// coherent view of active and past events or SyncPoints. +class OmptEventGroupInterface { +public: + OmptEventGroupInterface() = default; + ~OmptEventGroupInterface() = default; + + /// Non-copyable and non-movable + OmptEventGroupInterface(const OmptEventGroupInterface &) = delete; + OmptEventGroupInterface &operator=(const OmptEventGroupInterface &) = delete; + OmptEventGroupInterface(OmptEventGroupInterface &&) = delete; + OmptEventGroupInterface &operator=(OmptEventGroupInterface &&) = delete; + + /// Add given group to the set of active event groups. Effectively connecting + /// the given groupname (expected) with a target region id (observed). + bool addActiveEventGroup(const std::string &GroupName, + omptest::AssertEventGroup Group); + + /// Move given group from the set of active event groups to the set of + /// previously active event groups. + bool deprecateActiveEventGroup(const std::string &GroupName); + + /// Check if given group is currently part of the active event groups. + bool checkActiveEventGroups(const std::string &GroupName, + omptest::AssertEventGroup Group); + + /// Check if given group is currently part of the deprecated event groups. + bool checkDeprecatedEventGroups(const std::string &GroupName, + omptest::AssertEventGroup Group); + +private: + mutable std::mutex GroupMutex; + std::map ActiveEventGroups{}; + std::map DeprecatedEventGroups{}; + std::set EncounteredSyncPoints{}; +}; + +} // namespace omptest + +#endif diff --git a/openmp/tools/omptest/include/OmptCallbackHandler.h b/openmp/tools/omptest/include/OmptCallbackHandler.h new file mode 100644 index 0000000000000..40076c386107e --- /dev/null +++ b/openmp/tools/omptest/include/OmptCallbackHandler.h @@ -0,0 +1,165 @@ +//===- OmptCallbackHandler.h - Callback reception and handling --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides the OMPT callback handling declarations. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTCALLBACKHANDLER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTCALLBACKHANDLER_H + +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" + +#include "omp-tools.h" + +#include + +namespace omptest { + +/// Handler class to do whatever is needed to be done when a callback is invoked +/// by the OMP runtime +/// Supports a RecordAndReplay mechanism in which all OMPT events are recorded +/// and then replayed. This is so that a test can assert on, e.g., a device +/// initialize event, even though this would occur before a unit test is +/// actually executed. +class OmptCallbackHandler { +public: + ~OmptCallbackHandler() = default; + + /// Singleton handler + static OmptCallbackHandler &get(); + + /// Subscribe a listener to be notified for OMPT events + void subscribe(OmptListener *Listener); + + /// Remove all subscribers + void clearSubscribers(); + + /// When the record and replay mechanism is enabled this replays all OMPT + /// events + void replay(); + + /// Special asserter callback which checks that upon encountering the + /// synchronization point, all expected events have been processed. That is: + /// there are currently no remaining expected events for any asserter. + void handleAssertionSyncPoint(const std::string &SyncPointName); + + void handleThreadBegin(ompt_thread_t ThreadType, ompt_data_t *ThreadData); + + void handleThreadEnd(ompt_data_t *ThreadData); + + void handleTaskCreate(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *NewTaskData, int Flags, int HasDependences, + const void *CodeptrRA); + + void handleTaskSchedule(ompt_data_t *PriorTaskData, + ompt_task_status_t PriorTaskStatus, + ompt_data_t *NextTaskData); + + void handleImplicitTask(ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + unsigned int ActualParallelism, unsigned int Index, + int Flags); + + void handleParallelBegin(ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, + ompt_data_t *ParallelData, + unsigned int RequestedParallelism, int Flags, + const void *CodeptrRA); + + void handleParallelEnd(ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, int Flags, + const void *CodeptrRA); + + void handleDeviceInitialize(int DeviceNum, const char *Type, + ompt_device_t *Device, + ompt_function_lookup_t LookupFn, + const char *DocumentationStr); + + void handleDeviceFinalize(int DeviceNum); + + void handleTarget(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, ompt_id_t TargetId, + const void *CodeptrRA); + + void handleTargetEmi(ompt_target_t Kind, ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, + const void *CodeptrRA); + + void handleTargetSubmit(ompt_id_t TargetId, ompt_id_t HostOpId, + unsigned int RequestedNumTeams); + + void handleTargetSubmitEmi(ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams); + + void handleTargetDataOp(ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA); + + void handleTargetDataOpEmi(ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, + int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA); + + void handleDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, uint64_t ModuleId); + + void handleDeviceUnload(int DeviceNum, uint64_t ModuleId); + + void handleBufferRequest(int DeviceNum, ompt_buffer_t **Buffer, + size_t *Bytes); + + void handleBufferComplete(int DeviceNum, ompt_buffer_t *Buffer, size_t Bytes, + ompt_buffer_cursor_t Begin, int BufferOwned); + + void handleBufferRecord(ompt_record_ompt_t *Record); + + void handleBufferRecordDeallocation(ompt_buffer_t *Buffer); + + /// Not needed for a conforming minimal OMPT implementation + void handleWork(ompt_work_t WorkType, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + uint64_t Count, const void *CodeptrRA); + + void handleDispatch(ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance); + + void handleSyncRegion(ompt_sync_region_t Kind, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + const void *CodeptrRA); + +private: + /// Wrapper around emplace_back for potential additional logging / checking or + /// so + void recordEvent(OmptAssertEvent &&Event); + + /// Listeners to be notified + std::vector Subscribers; + + /// Toggle if OMPT events should notify subscribers immediately or not + bool RecordAndReplay{false}; + + /// Recorded events in Record and Replay mode + std::vector RecordedEvents; +}; + +} // namespace omptest + +// Pointer to global callback handler +extern omptest::OmptCallbackHandler *Handler; + +#endif diff --git a/openmp/tools/omptest/include/OmptTester.h b/openmp/tools/omptest/include/OmptTester.h new file mode 100644 index 0000000000000..155e61d5f7482 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTester.h @@ -0,0 +1,60 @@ +//===- OmptTester.h - Main header for ompTest usage -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the main header file for usage of the ompTest library. +/// Depending on the build either 'standalone' or GoogleTest headers are +/// included and corresponding main-function macros are defined. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTER_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTER_H + +#include "AssertMacros.h" +#include "Logging.h" +#include "OmptAliases.h" +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptCallbackHandler.h" + +#include +#include +#include +#include +#include +#include +#include + +// Standalone header section +#ifdef OPENMP_LIBOMPTEST_BUILD_STANDALONE + +#include "OmptTesterStandalone.h" + +// Define standalone main function (place once at the bottom of a testsuite) +#define OMPTEST_TESTSUITE_MAIN() \ + int main(int argc, char **argv) { \ + Runner R; \ + return R.run(); \ + } + +// GoogleTest header section +#else + +#include "OmptTesterGoogleTest.h" + +// Define GoogleTest main function (place once at the bottom of a testsuite) +#define OMPTEST_TESTSUITE_MAIN() \ + int main(int argc, char **argv) { \ + testing::InitGoogleTest(&argc, argv); \ + return RUN_ALL_TESTS(); \ + } + +#endif + +#endif diff --git a/openmp/tools/omptest/include/OmptTesterGlobals.h b/openmp/tools/omptest/include/OmptTesterGlobals.h new file mode 100644 index 0000000000000..62f443aed80e0 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterGlobals.h @@ -0,0 +1,36 @@ +//===- OmptTesterGlobals.h - Global function declarations -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains global function declarations, esp. for OMPT symbols. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGLOBALS_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGLOBALS_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version); +int start_trace(ompt_device_t *Device); +int flush_trace(ompt_device_t *Device); +// Function which calls flush_trace(ompt_device_t *) on all traced devices. +int flush_traced_devices(); +int stop_trace(ompt_device_t *Device); +// Function which calls stop_trace(ompt_device_t *) on all traced devices. +int stop_trace_devices(); +void libomptest_global_eventreporter_set_active(bool State); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/openmp/tools/omptest/include/OmptTesterGoogleTest.h b/openmp/tools/omptest/include/OmptTesterGoogleTest.h new file mode 100644 index 0000000000000..51b94bc678f50 --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterGoogleTest.h @@ -0,0 +1,86 @@ +//===- OmptTesterGoogleTest.h - GoogleTest header variant -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the GoogleTest-based header variant, defining the +/// actual test classes and their behavior. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGOOGLETEST_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERGOOGLETEST_H + +#include "AssertMacros.h" +#include "OmptAliases.h" +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptCallbackHandler.h" +#include "OmptTesterGlobals.h" + +// This will allow us to override the "TEST" macro of gtest +#define GTEST_DONT_DEFINE_TEST 1 +#include "gtest/gtest.h" + +namespace testing { +class GTEST_API_ OmptTestCase : public testing::Test, + public omptest::OmptEventGroupInterface { +public: + std::unique_ptr SequenceAsserter = + std::make_unique(); + std::unique_ptr SetAsserter = + std::make_unique(); + std::unique_ptr EventReporter = + std::make_unique(); + +protected: + void SetUp() override { + omptest::OmptCallbackHandler::get().subscribe(SequenceAsserter.get()); + omptest::OmptCallbackHandler::get().subscribe(SetAsserter.get()); + omptest::OmptCallbackHandler::get().subscribe(EventReporter.get()); + } + + void TearDown() override { + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // Remove subscribers to not be notified of events after test execution. + omptest::OmptCallbackHandler::get().clearSubscribers(); + + // This common testcase must not encounter any failures. + if (SequenceAsserter->checkState() == omptest::AssertState::fail || + SetAsserter->checkState() == omptest::AssertState::fail) + ADD_FAILURE(); + } +}; + +class GTEST_API_ OmptTestCaseXFail : public testing::OmptTestCase { +protected: + void TearDown() override { + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // Remove subscribers to not be notified of events after test execution. + omptest::OmptCallbackHandler::get().clearSubscribers(); + + // This eXpectedly failing testcase has to encounter at least one failure. + if (SequenceAsserter->checkState() == omptest::AssertState::pass && + SetAsserter->checkState() == omptest::AssertState::pass) + ADD_FAILURE(); + } +}; +} // namespace testing + +#define TEST(test_suite_name, test_name) \ + GTEST_TEST_(test_suite_name, test_name, ::testing::OmptTestCase, \ + ::testing::internal::GetTypeId<::testing::OmptTestCase>()) + +#define TEST_XFAIL(test_suite_name, test_name) \ + GTEST_TEST_(test_suite_name, test_name, ::testing::OmptTestCaseXFail, \ + ::testing::internal::GetTypeId<::testing::OmptTestCaseXFail>()) + +#endif // include guard diff --git a/openmp/tools/omptest/include/OmptTesterStandalone.h b/openmp/tools/omptest/include/OmptTesterStandalone.h new file mode 100644 index 0000000000000..06649031c5d1c --- /dev/null +++ b/openmp/tools/omptest/include/OmptTesterStandalone.h @@ -0,0 +1,123 @@ +//===- OmptTesterStandalone.h - Standalone header variant -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the 'standalone' header variant, defining the actual +/// test classes and their behavior (it does not have external dependencies). +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERSTANDALONE_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_OMPTTESTERSTANDALONE_H + +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptTesterGlobals.h" + +#include +#include + +// Forward declarations. +namespace omptest { +struct OmptEventAsserter; +class OmptEventReporter; +class OmptSequencedAsserter; +} // namespace omptest + +struct Error { + operator bool() { return Fail; } + bool Fail; +}; + +/// A pretty crude test case abstraction +struct TestCase { + TestCase(const std::string &name) + : IsDisabled(name.rfind("DISABLED_", 0) == 0), Name(name) {} + TestCase(const std::string &name, const omptest::AssertState &expected) + : IsDisabled(name.rfind("DISABLED_", 0) == 0), Name(name), + ExpectedState(expected) {} + virtual ~TestCase() = default; + Error exec(); + virtual void execImpl() { assert(false && "Allocating base class"); } + + bool IsDisabled{false}; + std::string Name; + omptest::AssertState ExpectedState{omptest::AssertState::pass}; + omptest::AssertState ResultState{omptest::AssertState::pass}; + + std::unique_ptr SequenceAsserter = + std::make_unique(); + std::unique_ptr SetAsserter = + std::make_unique(); + std::unique_ptr EventReporter = + std::make_unique(); +}; +/// A pretty crude test suite abstraction +struct TestSuite { + using TestCaseVec = std::vector>; + std::string Name; + TestSuite() = default; + TestSuite(const TestSuite &O) = delete; + TestSuite(TestSuite &&O); + void setup(); + void teardown(); + TestCaseVec::iterator begin(); + TestCaseVec::iterator end(); + TestCaseVec TestCases; +}; +/// Static class used to register all test cases and provide them to the driver +class TestRegistrar { +public: + static TestRegistrar &get(); + static std::vector getTestSuites(); + static void addCaseToSuite(TestCase *TC, std::string TSName); + +private: + TestRegistrar() = default; + TestRegistrar(const TestRegistrar &o) = delete; + TestRegistrar operator=(const TestRegistrar &o) = delete; + // Keep tests in order 'of appearance' (top -> bottom), avoid unordered_map + static std::map Tests; +}; +/// Hack to register test cases +struct Registerer { + Registerer(TestCase *TC, const std::string SuiteName); +}; +/// Eventually executes all test suites and cases, should contain logic to skip +/// stuff if needed +struct Runner { + Runner() : TestSuites(TestRegistrar::get().getTestSuites()) {} + int run(); + void reportError(const Error &Err); + void abortOrKeepGoing(); + // Print an execution summary of all testsuites and their corresponding + // testcases. + void printSummary(); + std::vector TestSuites; +}; + +/// MACROS TO DEFINE A TESTSUITE + TESTCASE (like GoogleTest does) +#define XQUOTE(str) QUOTE(str) +#define QUOTE(str) #str + +#define TEST_TEMPLATE(SuiteName, CaseName, ExpectedState) \ + struct SuiteName##_##CaseName : public TestCase { \ + SuiteName##_##CaseName() \ + : TestCase(XQUOTE(CaseName), omptest::AssertState::ExpectedState) {} \ + virtual void execImpl() override; \ + }; \ + static Registerer R_##SuiteName##CaseName(new SuiteName##_##CaseName(), \ + #SuiteName); \ + void SuiteName##_##CaseName::execImpl() + +#define TEST(SuiteName, CaseName) \ + TEST_TEMPLATE(SuiteName, CaseName, /*ExpectedState=*/pass) +#define TEST_XFAIL(SuiteName, CaseName) \ + TEST_TEMPLATE(SuiteName, CaseName, /*ExpectedState=*/fail) + +#endif diff --git a/openmp/tools/omptest/src/InternalEvent.cpp b/openmp/tools/omptest/src/InternalEvent.cpp new file mode 100644 index 0000000000000..87daf5a6a31ba --- /dev/null +++ b/openmp/tools/omptest/src/InternalEvent.cpp @@ -0,0 +1,367 @@ +//===- InternalEvent.cpp - Internal event implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements internal event representation methods and helper functions. +/// +//===----------------------------------------------------------------------===// + +#include "InternalEvent.h" + +#include +#include + +using namespace omptest; +using namespace util; + +std::string util::makeHexString(uint64_t Data, bool IsPointer, size_t MinBytes, + bool ShowHexBase) { + if (Data == 0 && IsPointer) + return "(nil)"; + + thread_local std::ostringstream os; + // Clear the content of the stream + os.str(std::string()); + + // Manually prefixing "0x" will make the use of std::setfill more easy + if (ShowHexBase) + os << "0x"; + + // Default to 32bit (8 hex digits) width, if exceeding 64bit or zero value + size_t NumDigits = (MinBytes > 0 && MinBytes < 9) ? (MinBytes << 1) : 8; + + if (MinBytes > 0) + os << std::setfill('0') << std::setw(NumDigits); + + os << std::hex << Data; + return os.str(); +} + +std::string internal::AssertionSyncPoint::toString() const { + std::string S{"Assertion SyncPoint: '"}; + S.append(Name).append(1, '\''); + return S; +} + +std::string internal::ThreadBegin::toString() const { + std::string S{"OMPT Callback ThreadBegin: "}; + S.append("ThreadType=").append(std::to_string(ThreadType)); + return S; +} + +std::string internal::ThreadEnd::toString() const { + std::string S{"OMPT Callback ThreadEnd"}; + return S; +} + +std::string internal::ParallelBegin::toString() const { + std::string S{"OMPT Callback ParallelBegin: "}; + S.append("NumThreads=").append(std::to_string(NumThreads)); + return S; +} + +std::string internal::ParallelEnd::toString() const { + // TODO: Should we expose more detailed info here? + std::string S{"OMPT Callback ParallelEnd"}; + return S; +} + +std::string internal::Work::toString() const { + std::string S{"OMPT Callback Work: "}; + S.append("work_type=").append(std::to_string(WorkType)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" count=").append(std::to_string(Count)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::Dispatch::toString() const { + std::string S{"OMPT Callback Dispatch: "}; + S.append("parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" kind=").append(std::to_string(Kind)); + // TODO Check what to print for instance in all different cases + if (Kind == ompt_dispatch_iteration) { + S.append(" instance=[it=") + .append(std::to_string(Instance.value)) + .append(1, ']'); + } else if (Kind == ompt_dispatch_section) { + S.append(" instance=[ptr=") + .append(makeHexString((uint64_t)Instance.ptr)) + .append(1, ']'); + } else if ((Kind == ompt_dispatch_ws_loop_chunk || + Kind == ompt_dispatch_taskloop_chunk || + Kind == ompt_dispatch_distribute_chunk) && + Instance.ptr != nullptr) { + auto Chunk = static_cast(Instance.ptr); + S.append(" instance=[chunk=(start=") + .append(std::to_string(Chunk->start)) + .append(", iterations=") + .append(std::to_string(Chunk->iterations)) + .append(")]"); + } + return S; +} + +std::string internal::TaskCreate::toString() const { + std::string S{"OMPT Callback TaskCreate: "}; + S.append("encountering_task_data=") + .append(makeHexString((uint64_t)EncounteringTaskData)); + S.append(" encountering_task_frame=") + .append(makeHexString((uint64_t)EncounteringTaskFrame)); + S.append(" new_task_data=").append(makeHexString((uint64_t)NewTaskData)); + S.append(" flags=").append(std::to_string(Flags)); + S.append(" has_dependences=").append(std::to_string(HasDependences)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::ImplicitTask::toString() const { + std::string S{"OMPT Callback ImplicitTask: "}; + S.append("endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" actual_parallelism=").append(std::to_string(ActualParallelism)); + S.append(" index=").append(std::to_string(Index)); + S.append(" flags=").append(std::to_string(Flags)); + return S; +} + +std::string internal::SyncRegion::toString() const { + std::string S{"OMPT Callback SyncRegion: "}; + S.append("kind=").append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" parallel_data=").append(makeHexString((uint64_t)ParallelData)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" codeptr=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::Target::toString() const { + // TODO Should we canonicalize the string prefix (use "OMPT ..." everywhere)? + std::string S{"Callback Target: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" kind=").append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" device_num=").append(std::to_string(DeviceNum)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetEmi::toString() const { + // TODO Should we canonicalize the string prefix (use "OMPT ..." everywhere)? + std::string S{"Callback Target EMI: kind="}; + S.append(std::to_string(Kind)); + S.append(" endpoint=").append(std::to_string(Endpoint)); + S.append(" device_num=").append(std::to_string(DeviceNum)); + S.append(" task_data=").append(makeHexString((uint64_t)TaskData)); + S.append(" (") + .append(makeHexString((uint64_t)(TaskData) ? TaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_task_data=") + .append(makeHexString((uint64_t)TargetTaskData)); + S.append(" (") + .append( + makeHexString((uint64_t)(TargetTaskData) ? TargetTaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetDataOp::toString() const { + std::string S{" Callback DataOp: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" host_op_id=").append(std::to_string(HostOpId)); + S.append(" optype=").append(std::to_string(OpType)); + S.append(" src=").append(makeHexString((uint64_t)SrcAddr)); + S.append(" src_device_num=").append(std::to_string(SrcDeviceNum)); + S.append(" dest=").append(makeHexString((uint64_t)DstAddr)); + S.append(" dest_device_num=").append(std::to_string(DstDeviceNum)); + S.append(" bytes=").append(std::to_string(Bytes)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetDataOpEmi::toString() const { + std::string S{" Callback DataOp EMI: endpoint="}; + S.append(std::to_string(Endpoint)); + S.append(" optype=").append(std::to_string(OpType)); + S.append(" target_task_data=") + .append(makeHexString((uint64_t)TargetTaskData)); + S.append(" (") + .append( + makeHexString((uint64_t)(TargetTaskData) ? TargetTaskData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" host_op_id=").append(makeHexString((uint64_t)HostOpId)); + S.append(" (") + .append(makeHexString((uint64_t)(HostOpId) ? (*HostOpId) : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" src=").append(makeHexString((uint64_t)SrcAddr)); + S.append(" src_device_num=").append(std::to_string(SrcDeviceNum)); + S.append(" dest=").append(makeHexString((uint64_t)DstAddr)); + S.append(" dest_device_num=").append(std::to_string(DstDeviceNum)); + S.append(" bytes=").append(std::to_string(Bytes)); + S.append(" code=").append(makeHexString((uint64_t)CodeptrRA)); + return S; +} + +std::string internal::TargetSubmit::toString() const { + std::string S{" Callback Submit: target_id="}; + S.append(std::to_string(TargetId)); + S.append(" host_op_id=").append(std::to_string(HostOpId)); + S.append(" req_num_teams=").append(std::to_string(RequestedNumTeams)); + return S; +} + +std::string internal::TargetSubmitEmi::toString() const { + std::string S{" Callback Submit EMI: endpoint="}; + S.append(std::to_string(Endpoint)); + S.append(" req_num_teams=").append(std::to_string(RequestedNumTeams)); + S.append(" target_data=").append(makeHexString((uint64_t)TargetData)); + S.append(" (") + .append(makeHexString((uint64_t)(TargetData) ? TargetData->value : 0, + /*IsPointer=*/false)) + .append(1, ')'); + S.append(" host_op_id=").append(makeHexString((uint64_t)HostOpId)); + S.append(" (") + .append(makeHexString((uint64_t)(HostOpId) ? (*HostOpId) : 0, + /*IsPointer=*/false)) + .append(1, ')'); + return S; +} + +std::string internal::DeviceInitialize::toString() const { + std::string S{"Callback Init: device_num="}; + S.append(std::to_string(DeviceNum)); + S.append(" type=").append((Type) ? Type : "(null)"); + S.append(" device=").append(makeHexString((uint64_t)Device)); + S.append(" lookup=").append(makeHexString((uint64_t)LookupFn)); + S.append(" doc=").append(makeHexString((uint64_t)DocStr)); + return S; +} + +std::string internal::DeviceFinalize::toString() const { + std::string S{"Callback Fini: device_num="}; + S.append(std::to_string(DeviceNum)); + return S; +} + +std::string internal::DeviceLoad::toString() const { + std::string S{"Callback Load: device_num:"}; + S.append(std::to_string(DeviceNum)); + S.append(" module_id:").append(std::to_string(ModuleId)); + S.append(" filename:").append((Filename == nullptr) ? "(null)" : Filename); + S.append(" host_adddr:").append(makeHexString((uint64_t)HostAddr)); + S.append(" device_addr:").append(makeHexString((uint64_t)DeviceAddr)); + S.append(" bytes:").append(std::to_string(Bytes)); + return S; +} + +std::string internal::BufferRequest::toString() const { + std::string S{"Allocated "}; + S.append(std::to_string((Bytes != nullptr) ? *Bytes : 0)) + .append(" bytes at "); + S.append(makeHexString((Buffer != nullptr) ? (uint64_t)*Buffer : 0)); + S.append(" in buffer request callback"); + return S; +} + +std::string internal::BufferComplete::toString() const { + std::string S{"Executing buffer complete callback: "}; + S.append(std::to_string(DeviceNum)).append(1, ' '); + S.append(makeHexString((uint64_t)Buffer)).append(1, ' '); + S.append(std::to_string(Bytes)).append(1, ' '); + S.append(makeHexString((uint64_t)Begin)).append(1, ' '); + S.append(std::to_string(BufferOwned)); + return S; +} + +std::string internal::BufferRecord::toString() const { + std::string S{""}; + std::string T{""}; + S.append("rec=").append(makeHexString((uint64_t)RecordPtr)); + S.append(" type=").append(std::to_string(Record.type)); + + T.append("time=").append(std::to_string(Record.time)); + T.append(" thread_id=").append(std::to_string(Record.thread_id)); + T.append(" target_id=").append(std::to_string(Record.target_id)); + + switch (Record.type) { + case ompt_callback_target: + case ompt_callback_target_emi: { + // Handle Target Record + ompt_record_target_t TR = Record.record.target; + S.append(" (Target task) ").append(T); + S.append(" kind=").append(std::to_string(TR.kind)); + S.append(" endpoint=").append(std::to_string(TR.endpoint)); + S.append(" device=").append(std::to_string(TR.device_num)); + S.append(" task_id=").append(std::to_string(TR.task_id)); + S.append(" codeptr=").append(makeHexString((uint64_t)TR.codeptr_ra)); + break; + } + case ompt_callback_target_data_op: + case ompt_callback_target_data_op_emi: { + // Handle Target DataOp Record + ompt_record_target_data_op_t TDR = Record.record.target_data_op; + S.append(" (Target data op) ").append(T); + S.append(" host_op_id=").append(std::to_string(TDR.host_op_id)); + S.append(" optype=").append(std::to_string(TDR.optype)); + S.append(" src_addr=").append(makeHexString((uint64_t)TDR.src_addr)); + S.append(" src_device=").append(std::to_string(TDR.src_device_num)); + S.append(" dest_addr=").append(makeHexString((uint64_t)TDR.dest_addr)); + S.append(" dest_device=").append(std::to_string(TDR.dest_device_num)); + S.append(" bytes=").append(std::to_string(TDR.bytes)); + S.append(" end_time=").append(std::to_string(TDR.end_time)); + S.append(" duration=").append(std::to_string(TDR.end_time - Record.time)); + S.append(" ns codeptr=").append(makeHexString((uint64_t)TDR.codeptr_ra)); + break; + } + case ompt_callback_target_submit: + case ompt_callback_target_submit_emi: { + // Handle Target Kernel Record + ompt_record_target_kernel_t TKR = Record.record.target_kernel; + S.append(" (Target kernel) ").append(T); + S.append(" host_op_id=").append(std::to_string(TKR.host_op_id)); + S.append(" requested_num_teams=") + .append(std::to_string(TKR.requested_num_teams)); + S.append(" granted_num_teams=") + .append(std::to_string(TKR.granted_num_teams)); + S.append(" end_time=").append(std::to_string(TKR.end_time)); + S.append(" duration=").append(std::to_string(TKR.end_time - Record.time)); + S.append(" ns"); + break; + } + default: + S.append(" (unsupported record type)"); + break; + } + + return S; +} + +std::string internal::BufferRecordDeallocation::toString() const { + std::string S{"Deallocated "}; + S.append(makeHexString((uint64_t)Buffer)); + return S; +} diff --git a/openmp/tools/omptest/src/InternalEventOperators.cpp b/openmp/tools/omptest/src/InternalEventOperators.cpp new file mode 100644 index 0000000000000..2fbea52a8639e --- /dev/null +++ b/openmp/tools/omptest/src/InternalEventOperators.cpp @@ -0,0 +1,317 @@ +//===- InternalEventOperators.cpp - Operator implementations ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines the internal event operators, like comparators. +/// +//===----------------------------------------------------------------------===// + +#include "InternalEvent.h" + +namespace omptest { + +namespace internal { + +bool operator==(const ParallelBegin &Expected, const ParallelBegin &Observed) { + return Expected.NumThreads == Observed.NumThreads; +} + +bool operator==(const Work &Expected, const Work &Observed) { + bool isSameWorkType = (Expected.WorkType == Observed.WorkType); + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + bool isSameParallelData = + (Expected.ParallelData == std::numeric_limits::min()) + ? true + : (Expected.ParallelData == Observed.ParallelData); + bool isSameTaskData = + (Expected.TaskData == std::numeric_limits::min()) + ? true + : (Expected.TaskData == Observed.TaskData); + bool isSameCount = (Expected.Count == std::numeric_limits::min()) + ? true + : (Expected.Count == Observed.Count); + return isSameWorkType && isSameEndpoint && isSameParallelData && + isSameTaskData && isSameCount; +} + +bool operator==(const ImplicitTask &Expected, const ImplicitTask &Observed) { + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + bool isSameActualParallelism = + (Expected.ActualParallelism == std::numeric_limits::min()) + ? true + : (Expected.ActualParallelism == Observed.ActualParallelism); + bool isSameIndex = + (Expected.Index == std::numeric_limits::min()) + ? true + : (Expected.Index == Observed.Index); + return isSameEndpoint && isSameActualParallelism && isSameIndex; +} + +bool operator==(const SyncRegion &Expected, const SyncRegion &Observed) { + bool isSameKind = (Expected.Kind == Observed.Kind); + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + bool isSameParallelData = + (Expected.ParallelData == std::numeric_limits::min()) + ? true + : (Expected.ParallelData == Observed.ParallelData); + bool isSameTaskData = + (Expected.TaskData == std::numeric_limits::min()) + ? true + : (Expected.TaskData == Observed.TaskData); + return isSameKind && isSameEndpoint && isSameParallelData && isSameTaskData; +} + +bool operator==(const Target &Expected, const Target &Observed) { + bool isSameKind = (Expected.Kind == Observed.Kind); + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + bool isSameDeviceNum = (Expected.DeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DeviceNum == Observed.DeviceNum); + return isSameKind && isSameEndpoint && isSameDeviceNum; +} + +bool operator==(const TargetEmi &Expected, const TargetEmi &Observed) { + bool isSameKind = (Expected.Kind == Observed.Kind); + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + bool isSameDeviceNum = (Expected.DeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DeviceNum == Observed.DeviceNum); + return isSameKind && isSameEndpoint && isSameDeviceNum; +} + +bool operator==(const TargetDataOp &Expected, const TargetDataOp &Observed) { + bool isSameOpType = (Expected.OpType == Observed.OpType); + bool isSameSize = (Expected.Bytes == std::numeric_limits::min()) + ? true + : (Expected.Bytes == Observed.Bytes); + bool isSameSrcAddr = (Expected.SrcAddr == std::numeric_limits::min()) + ? true + : (Expected.SrcAddr == Observed.SrcAddr); + bool isSameDstAddr = (Expected.DstAddr == std::numeric_limits::min()) + ? true + : (Expected.DstAddr == Observed.DstAddr); + bool isSameSrcDeviceNum = + (Expected.SrcDeviceNum == std::numeric_limits::min()) + ? true + : (Expected.SrcDeviceNum == Observed.SrcDeviceNum); + bool isSameDstDeviceNum = + (Expected.DstDeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DstDeviceNum == Observed.DstDeviceNum); + return isSameOpType && isSameSize && isSameSrcAddr && isSameDstAddr && + isSameSrcDeviceNum && isSameDstDeviceNum; +} + +bool operator==(const TargetDataOpEmi &Expected, + const TargetDataOpEmi &Observed) { + bool isSameOpType = (Expected.OpType == Observed.OpType); + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + bool isSameSize = (Expected.Bytes == std::numeric_limits::min()) + ? true + : (Expected.Bytes == Observed.Bytes); + bool isSameSrcAddr = (Expected.SrcAddr == std::numeric_limits::min()) + ? true + : (Expected.SrcAddr == Observed.SrcAddr); + bool isSameDstAddr = (Expected.DstAddr == std::numeric_limits::min()) + ? true + : (Expected.DstAddr == Observed.DstAddr); + bool isSameSrcDeviceNum = + (Expected.SrcDeviceNum == std::numeric_limits::min()) + ? true + : (Expected.SrcDeviceNum == Observed.SrcDeviceNum); + bool isSameDstDeviceNum = + (Expected.DstDeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DstDeviceNum == Observed.DstDeviceNum); + return isSameOpType && isSameEndpoint && isSameSize && isSameSrcAddr && + isSameDstAddr && isSameSrcDeviceNum && isSameDstDeviceNum; +} + +bool operator==(const TargetSubmit &Expected, const TargetSubmit &Observed) { + bool isSameReqNumTeams = + (Expected.RequestedNumTeams == Observed.RequestedNumTeams); + return isSameReqNumTeams; +} + +bool operator==(const TargetSubmitEmi &Expected, + const TargetSubmitEmi &Observed) { + bool isSameReqNumTeams = + (Expected.RequestedNumTeams == Observed.RequestedNumTeams); + bool isSameEndpoint = (Expected.Endpoint == Observed.Endpoint); + return isSameReqNumTeams && isSameEndpoint; +} + +bool operator==(const DeviceInitialize &Expected, + const DeviceInitialize &Observed) { + bool isSameDeviceNum = (Expected.DeviceNum == Observed.DeviceNum); + bool isSameType = (Expected.Type == std::numeric_limits::min()) + ? true + : ((Expected.Type == Observed.Type) || + (strcmp(Expected.Type, Observed.Type) == 0)); + bool isSameDevice = + (Expected.Device == std::numeric_limits::min()) + ? true + : (Expected.Device == Observed.Device); + return isSameDeviceNum && isSameType && isSameDevice; +} + +bool operator==(const DeviceFinalize &Expected, + const DeviceFinalize &Observed) { + bool isSameDeviceNum = (Expected.DeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DeviceNum == Observed.DeviceNum); + return isSameDeviceNum; +} + +bool operator==(const DeviceLoad &Expected, const DeviceLoad &Observed) { + bool isSameDeviceNum = (Expected.DeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DeviceNum == Observed.DeviceNum); + bool isSameSize = (Expected.Bytes == std::numeric_limits::min()) + ? true + : (Expected.Bytes == Observed.Bytes); + return isSameDeviceNum && isSameSize; +} + +bool operator==(const BufferRequest &Expected, const BufferRequest &Observed) { + bool isSameDeviceNum = (Expected.DeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DeviceNum == Observed.DeviceNum); + bool isSameSize = (Expected.Bytes == std::numeric_limits::min()) + ? true + : (Expected.Bytes == Observed.Bytes); + return isSameDeviceNum && isSameSize; +} + +bool operator==(const BufferComplete &Expected, + const BufferComplete &Observed) { + bool isSameDeviceNum = (Expected.DeviceNum == std::numeric_limits::min()) + ? true + : (Expected.DeviceNum == Observed.DeviceNum); + bool isSameSize = (Expected.Bytes == std::numeric_limits::min()) + ? true + : (Expected.Bytes == Observed.Bytes); + return isSameDeviceNum && isSameSize; +} + +bool operator==(const BufferRecord &Expected, const BufferRecord &Observed) { + bool isSameType = (Expected.Record.type == Observed.Record.type); + bool isSameTargetId = + (Expected.Record.target_id == std::numeric_limits::min()) + ? true + : (Expected.Record.target_id == Observed.Record.target_id); + if (!(isSameType && isSameTargetId)) + return false; + bool isEqual = true; + ompt_device_time_t ObservedDurationNs = + Observed.Record.record.target_data_op.end_time - Observed.Record.time; + switch (Expected.Record.type) { + case ompt_callback_target: + isEqual &= (Expected.Record.record.target.kind == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target.kind == + Observed.Record.record.target.kind); + isEqual &= (Expected.Record.record.target.endpoint == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target.endpoint == + Observed.Record.record.target.endpoint); + isEqual &= (Expected.Record.record.target.device_num == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target.device_num == + Observed.Record.record.target.device_num); + break; + case ompt_callback_target_data_op: + isEqual &= (Expected.Record.record.target_data_op.optype == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.optype == + Observed.Record.record.target_data_op.optype); + isEqual &= (Expected.Record.record.target_data_op.bytes == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.bytes == + Observed.Record.record.target_data_op.bytes); + isEqual &= (Expected.Record.record.target_data_op.src_addr == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.src_addr == + Observed.Record.record.target_data_op.src_addr); + isEqual &= (Expected.Record.record.target_data_op.dest_addr == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.dest_addr == + Observed.Record.record.target_data_op.dest_addr); + isEqual &= (Expected.Record.record.target_data_op.src_device_num == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.src_device_num == + Observed.Record.record.target_data_op.src_device_num); + isEqual &= (Expected.Record.record.target_data_op.dest_device_num == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.dest_device_num == + Observed.Record.record.target_data_op.dest_device_num); + isEqual &= (Expected.Record.record.target_data_op.host_op_id == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.host_op_id == + Observed.Record.record.target_data_op.host_op_id); + isEqual &= (Expected.Record.record.target_data_op.codeptr_ra == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_data_op.codeptr_ra == + Observed.Record.record.target_data_op.codeptr_ra); + if (Expected.Record.record.target_data_op.end_time != + std::numeric_limits::min()) { + isEqual &= + ObservedDurationNs <= Expected.Record.record.target_data_op.end_time; + } + isEqual &= ObservedDurationNs >= Expected.Record.time; + break; + case ompt_callback_target_submit: + ObservedDurationNs = + Observed.Record.record.target_kernel.end_time - Observed.Record.time; + isEqual &= + (Expected.Record.record.target_kernel.requested_num_teams == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_kernel.requested_num_teams == + Observed.Record.record.target_kernel.requested_num_teams); + isEqual &= (Expected.Record.record.target_kernel.granted_num_teams == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_kernel.granted_num_teams == + Observed.Record.record.target_kernel.granted_num_teams); + isEqual &= (Expected.Record.record.target_kernel.host_op_id == + std::numeric_limits::min()) + ? true + : (Expected.Record.record.target_kernel.host_op_id == + Observed.Record.record.target_kernel.host_op_id); + if (Expected.Record.record.target_kernel.end_time != + std::numeric_limits::min()) { + isEqual &= + ObservedDurationNs <= Expected.Record.record.target_kernel.end_time; + } + isEqual &= ObservedDurationNs >= Expected.Record.time; + break; + default: + (static_cast(false && "Encountered invalid record type") + ? void(0) + : __assert_fail("false && \"Encountered invalid record type\"", + "../src/InternalEventOperators.cpp", 285, + __extension__ __PRETTY_FUNCTION__)); + } + return isEqual; +} + +} // namespace internal + +} // namespace omptest diff --git a/openmp/tools/omptest/src/Logging.cpp b/openmp/tools/omptest/src/Logging.cpp new file mode 100644 index 0000000000000..28329c74d188d --- /dev/null +++ b/openmp/tools/omptest/src/Logging.cpp @@ -0,0 +1,177 @@ +//===- Logging.cpp - General logging class implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements ompTest-tailored logging. +/// +//===----------------------------------------------------------------------===// + +#include "Logging.h" + +using namespace omptest; +using namespace logging; + +Logger::Logger(Level LogLevel, std::ostream &OutStream, bool FormatOutput) + : LoggingLevel(LogLevel), OutStream(OutStream), FormatOutput(FormatOutput) { + // Flush any buffered output + OutStream << std::flush; +} + +Logger::~Logger() { + // Flush any buffered output + OutStream << std::flush; +} + +std::map> AggregatedFormatOptions{ + {Level::DIAGNOSTIC, {FormatOption::COLOR_LightBlue}}, + {Level::INFO, {FormatOption::COLOR_LightGray}}, + {Level::WARNING, {FormatOption::COLOR_LightYellow}}, + {Level::ERROR, {FormatOption::COLOR_Red}}, + {Level::CRITICAL, {FormatOption::COLOR_LightRed}}, + {Level::Default, {FormatOption::NONE}}, + {Level::ExpectedEvent, {FormatOption::BOLD, FormatOption::COLOR_Cyan}}, + {Level::ObservedEvent, {FormatOption::COLOR_Cyan}}, + {Level::OffendingEvent, {FormatOption::COLOR_Yellow}}}; + +const char *logging::to_string(Level LogLevel) { + switch (LogLevel) { + case Level::DIAGNOSTIC: + return "DIAGNOSTIC"; + case Level::INFO: + return "INFO"; + case Level::WARNING: + return "WARNING"; + case Level::ERROR: + return "ERROR"; + case Level::CRITICAL: + return "CRITICAL"; + default: + assert(false && "Requested string representation for unknown LogLevel"); + return "UNKNOWN"; + } +} + +std::string logging::getFormatSequence(Level LogLevel) { + auto Options = AggregatedFormatOptions[LogLevel]; + std::stringstream SS{"\033["}; + SS << "\033["; + if (!Options.empty()) { + for (auto &Option : AggregatedFormatOptions[LogLevel]) + SS << int(Option) << ';'; + SS.seekp(-1, SS.cur); + SS << 'm'; + } else { + // Fallback to NONE / reset formatting + SS << "0m"; + } + return SS.str(); +} + +std::string logging::format(const std::string &Message, FormatOption Option) { + std::stringstream SS{"\033["}; + SS << "\033["; + SS << int(Option) << 'm' << Message << "\033[0m"; + return SS.str(); +} + +std::string logging::format(const std::string &Message, + std::set Options) { + std::stringstream SS{"\033["}; + SS << "\033["; + for (auto &Option : Options) + SS << int(Option) << ';'; + SS.seekp(-1, SS.cur); + SS << 'm' << Message << "\033[0m"; + return SS.str(); +} + +void Logger::log(Level LogLevel, const std::string &Message) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << Message << getFormatSequence() << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message << std::endl; + } +} + +void Logger::eventMismatch(const omptest::OmptAssertEvent &OffendingEvent, + const std::string &Message, Level LogLevel) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << getFormatSequence() + << format(Message, AggregatedFormatOptions[LogLevel]) + << "\n\tOffending event name='" + << format(OffendingEvent.getEventName(), + AggregatedFormatOptions[Level::OffendingEvent]) + << "'\n\tOffending='" + << format(OffendingEvent.toString(), + AggregatedFormatOptions[Level::OffendingEvent]) + << '\'' << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message + << "\n\tOffending event name='" << OffendingEvent.getEventName() + << "'\n\tOffending='" << OffendingEvent.toString() << '\'' + << std::endl; + } +} + +void Logger::eventMismatch(const omptest::OmptAssertEvent &ExpectedEvent, + const omptest::OmptAssertEvent &ObservedEvent, + const std::string &Message, Level LogLevel) const { + // Serialize logging + std::lock_guard Lock(LogMutex); + if (LoggingLevel > LogLevel) + return; + + if (FormatOutput) { + OutStream << getFormatSequence(LogLevel) << '[' << to_string(LogLevel) + << "] " << Message << getFormatSequence() + << "\n\tExpected event name='" + << format(ExpectedEvent.getEventName(), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "' observe='" + << format(to_string(ExpectedEvent.getEventExpectedState()), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "'\n\tObserved event name='" + << format(ObservedEvent.getEventName(), + AggregatedFormatOptions[Level::ObservedEvent]) + << "'\n\tExpected='" + << format(ExpectedEvent.toString(), + AggregatedFormatOptions[Level::ExpectedEvent]) + << "'\n\tObserved='" + << format(ObservedEvent.toString(), + AggregatedFormatOptions[Level::ObservedEvent]) + << '\'' << std::endl; + } else { + OutStream << '[' << to_string(LogLevel) << "] " << Message + << "\n\tExpected event name='" << ExpectedEvent.getEventName() + << "' observe='" + << to_string(ExpectedEvent.getEventExpectedState()) + << "'\n\tObserved event name='" << ObservedEvent.getEventName() + << "'\n\tExpected='" << ExpectedEvent.toString() + << "'\n\tObserved='" << ObservedEvent.toString() << '\'' + << std::endl; + } +} + +void Logger::setFormatOutput(bool Enabled) { FormatOutput = Enabled; } + +Level Logger::getLoggingLevel() const { return LoggingLevel; } + +void Logger::setLoggingLevel(Level LogLevel) { LoggingLevel = LogLevel; } diff --git a/openmp/tools/omptest/src/OmptAssertEvent.cpp b/openmp/tools/omptest/src/OmptAssertEvent.cpp new file mode 100644 index 0000000000000..b03f267a8c397 --- /dev/null +++ b/openmp/tools/omptest/src/OmptAssertEvent.cpp @@ -0,0 +1,587 @@ +//===- OmptAssertEvent.cpp - Assertion event implementations ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements assertion event CTORs, for generally all observable events. +/// +//===----------------------------------------------------------------------===// + +#include "OmptAssertEvent.h" +#include + +using namespace omptest; + +const char *omptest::to_string(ObserveState State) { + switch (State) { + case ObserveState::generated: + return "generated"; + case ObserveState::always: + return "always"; + case ObserveState::never: + return "never"; + default: + assert(false && "Requested string representation for unknown ObserveState"); + return "UNKNOWN"; + } +} + +OmptAssertEvent::OmptAssertEvent(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + internal::InternalEvent *IE) + : Name(Name), Group(Group), ExpectedState(Expected), TheEvent(IE) {} + +OmptAssertEvent OmptAssertEvent::AssertionSyncPoint( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, const std::string &SyncPointName) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::AssertionSyncPoint(SyncPointName)); +} + +OmptAssertEvent +OmptAssertEvent::AssertionSuspend(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::AssertionSuspend()); +} + +OmptAssertEvent OmptAssertEvent::ThreadBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_thread_t ThreadType) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ThreadBegin(ThreadType)); +} + +OmptAssertEvent OmptAssertEvent::ThreadEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::ThreadEnd()); +} + +OmptAssertEvent OmptAssertEvent::ParallelBegin(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int NumThreads) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ParallelBegin(NumThreads)); +} + +OmptAssertEvent OmptAssertEvent::ParallelEnd(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ParallelEnd(ParallelData, + EncounteringTaskData, Flags, + CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Work(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::Work(WorkType, Endpoint, ParallelData, + TaskData, Count, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Dispatch(const std::string &Name, const std::string &Group, + const ObserveState &Expected, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + ompt_dispatch_t Kind, ompt_data_t Instance) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::Dispatch(ParallelData, TaskData, Kind, Instance)); +} + +OmptAssertEvent OmptAssertEvent::TaskCreate( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *NewTaskData, + int Flags, int HasDependences, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TaskCreate(EncounteringTaskData, EncounteringTaskFrame, + NewTaskData, Flags, HasDependences, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TaskSchedule(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::TaskSchedule()); +} + +OmptAssertEvent OmptAssertEvent::ImplicitTask( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, ompt_data_t *TaskData, + unsigned int ActualParallelism, unsigned int Index, int Flags) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::ImplicitTask(Endpoint, ParallelData, + TaskData, ActualParallelism, + Index, Flags)); +} + +OmptAssertEvent OmptAssertEvent::SyncRegion( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, ompt_data_t *ParallelData, + ompt_data_t *TaskData, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::SyncRegion(Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::Target(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_id_t TargetId, + const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::Target(Kind, Endpoint, DeviceNum, + TaskData, TargetId, CodeptrRA)); +} + +OmptAssertEvent +OmptAssertEvent::TargetEmi(const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, + ompt_data_t *TaskData, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetEmi(Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, + TargetData, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOp( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_id_t TargetId, ompt_id_t HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetDataOp( + TargetId, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOp( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, size_t Bytes, + void *SrcAddr, void *DstAddr, int SrcDeviceNum, int DstDeviceNum, + ompt_id_t TargetId, ompt_id_t HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetDataOp( + TargetId, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOpEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetTaskData, ompt_data_t *TargetData, ompt_id_t *HostOpId, + ompt_target_data_op_t OpType, void *SrcAddr, int SrcDeviceNum, + void *DstAddr, int DstDeviceNum, size_t Bytes, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetDataOpEmi(Endpoint, TargetTaskData, TargetData, + HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetDataOpEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_target_data_op_t OpType, + ompt_scope_endpoint_t Endpoint, size_t Bytes, void *SrcAddr, void *DstAddr, + int SrcDeviceNum, int DstDeviceNum, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetDataOpEmi(Endpoint, TargetTaskData, TargetData, + HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_id_t TargetId, + ompt_id_t HostOpId, + unsigned int RequestedNumTeams) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetSubmit(TargetId, HostOpId, RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmit(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + unsigned int RequestedNumTeams, + ompt_id_t TargetId, + ompt_id_t HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::TargetSubmit(TargetId, HostOpId, RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmitEmi( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, ompt_id_t *HostOpId, + unsigned int RequestedNumTeams) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetSubmitEmi(Endpoint, TargetData, + HostOpId, + RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::TargetSubmitEmi(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + unsigned int RequestedNumTeams, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *TargetData, + ompt_id_t *HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::TargetSubmitEmi(Endpoint, TargetData, + HostOpId, + RequestedNumTeams)); +} + +OmptAssertEvent OmptAssertEvent::ControlTool(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::ControlTool()); +} + +OmptAssertEvent OmptAssertEvent::DeviceInitialize( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, const char *Type, + ompt_device_t *Device, ompt_function_lookup_t LookupFn, + const char *DocumentationStr) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::DeviceInitialize(DeviceNum, Type, Device, + LookupFn, + DocumentationStr)); +} + +OmptAssertEvent OmptAssertEvent::DeviceFinalize(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::DeviceFinalize(DeviceNum)); +} + +OmptAssertEvent +OmptAssertEvent::DeviceLoad(const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, + const char *Filename, int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, void *HostAddr, + void *DeviceAddr, uint64_t ModuleId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent( + EName, EGroup, Expected, + new internal::DeviceLoad(DeviceNum, Filename, OffsetInFile, VmaInFile, + Bytes, HostAddr, DeviceAddr, ModuleId)); +} + +OmptAssertEvent OmptAssertEvent::DeviceUnload(const std::string &Name, + const std::string &Group, + const ObserveState &Expected) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, new internal::DeviceUnload()); +} + +OmptAssertEvent OmptAssertEvent::BufferRequest(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + int DeviceNum, + ompt_buffer_t **Buffer, + size_t *Bytes) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRequest(DeviceNum, Buffer, Bytes)); +} + +OmptAssertEvent OmptAssertEvent::BufferComplete( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, int DeviceNum, ompt_buffer_t *Buffer, + size_t Bytes, ompt_buffer_cursor_t Begin, int BufferOwned) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferComplete(DeviceNum, Buffer, Bytes, + Begin, BufferOwned)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord(const std::string &Name, + const std::string &Group, + const ObserveState &Expected, + ompt_record_ompt_t *Record) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(Record)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, int DeviceNum, ompt_id_t TaskId, + ompt_id_t TargetId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + if (Type != ompt_callback_target) + assert(false && "CTOR only suited for type: 'ompt_callback_target'"); + + ompt_record_target_t Subrecord{Kind, Endpoint, DeviceNum, + TaskId, TargetId, CodeptrRA}; + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + RecordPtr->time = expectedDefault(ompt_device_time_t); + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + std::pair Timeframe, void *SrcAddr, + void *DstAddr, int SrcDeviceNum, int DstDeviceNum, ompt_id_t TargetId, + ompt_id_t HostOpId, const void *CodeptrRA) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + if (Type != ompt_callback_target_data_op) + assert(false && + "CTOR only suited for type: 'ompt_callback_target_data_op'"); + + ompt_record_target_data_op_t Subrecord{ + HostOpId, OpType, SrcAddr, SrcDeviceNum, DstAddr, + DstDeviceNum, Bytes, Timeframe.second, CodeptrRA}; + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + RecordPtr->time = Timeframe.first; + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target_data_op = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_target_data_op_t OpType, size_t Bytes, + ompt_device_time_t MinimumTimeDelta, void *SrcAddr, void *DstAddr, + int SrcDeviceNum, int DstDeviceNum, ompt_id_t TargetId, ompt_id_t HostOpId, + const void *CodeptrRA) { + return BufferRecord(Name, Group, Expected, Type, OpType, Bytes, + {MinimumTimeDelta, expectedDefault(ompt_device_time_t)}, + SrcAddr, DstAddr, SrcDeviceNum, DstDeviceNum, TargetId, + HostOpId, CodeptrRA); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + std::pair Timeframe, + unsigned int RequestedNumTeams, unsigned int GrantedNumTeams, + ompt_id_t TargetId, ompt_id_t HostOpId) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + + bool isDefault = (Timeframe.first == expectedDefault(ompt_device_time_t)); + isDefault &= (Timeframe.second == expectedDefault(ompt_device_time_t)); + isDefault &= (RequestedNumTeams == expectedDefault(unsigned int)); + isDefault &= (GrantedNumTeams == expectedDefault(unsigned int)); + isDefault &= (TargetId == expectedDefault(ompt_id_t)); + isDefault &= (HostOpId == expectedDefault(ompt_id_t)); + + ompt_record_ompt_t *RecordPtr = + (ompt_record_ompt_t *)malloc(sizeof(ompt_record_ompt_t)); + memset(RecordPtr, 0, sizeof(ompt_record_ompt_t)); + RecordPtr->type = Type; + + // This handles the simplest occurrence of a device tracing record + // We can only check for Type -- since all other properties are set to default + if (isDefault) { + RecordPtr->time = expectedDefault(ompt_device_time_t); + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = expectedDefault(ompt_id_t); + if (Type == ompt_callback_target) { + ompt_record_target_t Subrecord{expectedDefault(ompt_target_t), + expectedDefault(ompt_scope_endpoint_t), + expectedDefault(int), + expectedDefault(ompt_id_t), + expectedDefault(ompt_id_t), + expectedDefault(void *)}; + RecordPtr->record.target = Subrecord; + } + + if (Type == ompt_callback_target_data_op) { + ompt_record_target_data_op_t Subrecord{ + expectedDefault(ompt_id_t), expectedDefault(ompt_target_data_op_t), + expectedDefault(void *), expectedDefault(int), + expectedDefault(void *), expectedDefault(int), + expectedDefault(size_t), expectedDefault(ompt_device_time_t), + expectedDefault(void *)}; + RecordPtr->record.target_data_op = Subrecord; + } + + if (Type == ompt_callback_target_submit) { + ompt_record_target_kernel_t Subrecord{ + expectedDefault(ompt_id_t), expectedDefault(unsigned int), + expectedDefault(unsigned int), expectedDefault(ompt_device_time_t)}; + RecordPtr->record.target_kernel = Subrecord; + } + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); + } + + if (Type != ompt_callback_target_submit) + assert(false && "CTOR only suited for type: 'ompt_callback_target_submit'"); + + ompt_record_target_kernel_t Subrecord{HostOpId, RequestedNumTeams, + GrantedNumTeams, Timeframe.second}; + + RecordPtr->time = Timeframe.first; + RecordPtr->thread_id = expectedDefault(ompt_id_t); + RecordPtr->target_id = TargetId; + RecordPtr->record.target_kernel = Subrecord; + + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecord(RecordPtr)); +} + +OmptAssertEvent OmptAssertEvent::BufferRecord( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_callbacks_t Type, + ompt_device_time_t MinimumTimeDelta, unsigned int RequestedNumTeams, + unsigned int GrantedNumTeams, ompt_id_t TargetId, ompt_id_t HostOpId) { + return BufferRecord(Name, Group, Expected, Type, + {MinimumTimeDelta, expectedDefault(ompt_device_time_t)}, + RequestedNumTeams, GrantedNumTeams, TargetId, HostOpId); +} + +OmptAssertEvent OmptAssertEvent::BufferRecordDeallocation( + const std::string &Name, const std::string &Group, + const ObserveState &Expected, ompt_buffer_t *Buffer) { + auto EName = getName(Name); + auto EGroup = getGroup(Group); + return OmptAssertEvent(EName, EGroup, Expected, + new internal::BufferRecordDeallocation(Buffer)); +} + +std::string OmptAssertEvent::getEventName() const { return Name; } + +std::string OmptAssertEvent::getEventGroup() const { return Group; } + +ObserveState OmptAssertEvent::getEventExpectedState() const { + return ExpectedState; +} + +internal::EventTy OmptAssertEvent::getEventType() const { + return TheEvent->getType(); +} + +internal::InternalEvent *OmptAssertEvent::getEvent() const { + return TheEvent.get(); +} + +std::string OmptAssertEvent::toString(bool PrefixEventName) const { + std::string S; + if (PrefixEventName) + S.append(getEventName()).append(": "); + S.append((TheEvent == nullptr) ? "OmptAssertEvent" : TheEvent->toString()); + return S; +} + +bool omptest::operator==(const OmptAssertEvent &A, const OmptAssertEvent &B) { + assert(A.TheEvent.get() != nullptr && "A is valid"); + assert(B.TheEvent.get() != nullptr && "B is valid"); + + return A.TheEvent->getType() == B.TheEvent->getType() && + A.TheEvent->equals(B.TheEvent.get()); +} diff --git a/openmp/tools/omptest/src/OmptAsserter.cpp b/openmp/tools/omptest/src/OmptAsserter.cpp new file mode 100644 index 0000000000000..df496a5fad8d4 --- /dev/null +++ b/openmp/tools/omptest/src/OmptAsserter.cpp @@ -0,0 +1,477 @@ +//===- OmptAsserter.cpp - Asserter-related implementations ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implements all asserter-related class methods, like: notifications, handling +/// of groups or determination of the testcase state. +/// +//===----------------------------------------------------------------------===// + +#include "OmptAsserter.h" +#include "Logging.h" + +#include + +using namespace omptest; +using namespace internal; + +// Initialize static members +std::mutex OmptAsserter::StaticMemberAccessMutex; +std::weak_ptr + OmptAsserter::EventGroupInterfaceInstance; +std::weak_ptr OmptAsserter::LoggingInstance; + +OmptAsserter::OmptAsserter() { + // Protect static members access + std::lock_guard Lock(StaticMemberAccessMutex); + + // Upgrade OmptEventGroupInterface weak_ptr to shared_ptr + { + EventGroups = EventGroupInterfaceInstance.lock(); + if (!EventGroups) { + // Coordinator doesn't exist or was previously destroyed, create a new + // one. + EventGroups = std::make_shared(); + // Store a weak reference to it + EventGroupInterfaceInstance = EventGroups; + } + // EventGroups is now a valid shared_ptr, either to a new or existing + // instance. + } + + // Upgrade logging::Logger weak_ptr to shared_ptr + { + Log = LoggingInstance.lock(); + if (!Log) { + // Coordinator doesn't exist or was previously destroyed, create a new + // one. + Log = std::make_shared(); + // Store a weak reference to it + LoggingInstance = Log; + } + // Log is now a valid shared_ptr, either to a new or existing instance. + } +} + +void OmptListener::setActive(bool Enabled) { Active = Enabled; } + +bool OmptListener::isActive() { return Active; } + +bool OmptListener::isSuppressedEventType(EventTy EvTy) { + return SuppressedEvents.find(EvTy) != SuppressedEvents.end(); +} + +void OmptListener::permitEvent(EventTy EvTy) { SuppressedEvents.erase(EvTy); } + +void OmptListener::suppressEvent(EventTy EvTy) { + SuppressedEvents.insert(EvTy); +} + +void OmptAsserter::insert(OmptAssertEvent &&AE) { + assert(false && "Base class 'insert' has undefined semantics."); +} + +void OmptAsserter::notify(OmptAssertEvent &&AE) { + // Ignore notifications while inactive + if (!isActive() || isSuppressedEventType(AE.getEventType())) + return; + + this->notifyImpl(std::move(AE)); +} + +AssertState OmptAsserter::checkState() { return State; } + +bool OmptAsserter::verifyEventGroups(const OmptAssertEvent &ExpectedEvent, + const OmptAssertEvent &ObservedEvent) { + assert(ExpectedEvent.getEventType() == ObservedEvent.getEventType() && + "Type mismatch: Expected != Observed event type"); + assert(EventGroups && "Missing EventGroups interface"); + + // Ignore all events within "default" group + auto GroupName = ExpectedEvent.getEventGroup(); + + if (GroupName == "default") + return true; + + // Get a pointer to the observed internal event + auto Event = ObservedEvent.getEvent(); + + switch (Event->getType()) { + case EventTy::Target: + if (auto E = static_cast(Event)) { + if (E->Endpoint == ompt_scope_begin) { + // Add new group since we entered a Target Region + EventGroups->addActiveEventGroup(GroupName, + AssertEventGroup{E->TargetId}); + } else if (E->Endpoint == ompt_scope_end) { + // Deprecate group since we return from a Target Region + EventGroups->deprecateActiveEventGroup(GroupName); + } + return true; + } + return false; + case EventTy::TargetEmi: + if (auto E = static_cast(Event)) { + if (E->Endpoint == ompt_scope_begin) { + // Add new group since we entered a Target Region + EventGroups->addActiveEventGroup( + GroupName, AssertEventGroup{E->TargetData->value}); + } else if (E->Endpoint == ompt_scope_end) { + // Deprecate group since we return from a Target Region + EventGroups->deprecateActiveEventGroup(GroupName); + } + return true; + } + return false; + case EventTy::TargetDataOp: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups(GroupName, + AssertEventGroup{E->TargetId}); + + return false; + case EventTy::TargetDataOpEmi: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->TargetData->value}); + + return false; + case EventTy::TargetSubmit: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups(GroupName, + AssertEventGroup{E->TargetId}); + + return false; + case EventTy::TargetSubmitEmi: + if (auto E = static_cast(Event)) + return EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->TargetData->value}); + + return false; + case EventTy::BufferRecord: + // BufferRecords are delivered asynchronously: also check deprecated groups. + if (auto E = static_cast(Event)) + return (EventGroups->checkActiveEventGroups( + GroupName, AssertEventGroup{E->Record.target_id}) || + EventGroups->checkDeprecatedEventGroups( + GroupName, AssertEventGroup{E->Record.target_id})); + return false; + // Some event types do not need any handling + case EventTy::ThreadBegin: + case EventTy::ThreadEnd: + case EventTy::ParallelBegin: + case EventTy::ParallelEnd: + case EventTy::Work: + case EventTy::Dispatch: + case EventTy::TaskCreate: + case EventTy::Dependences: + case EventTy::TaskDependence: + case EventTy::TaskSchedule: + case EventTy::ImplicitTask: + case EventTy::Masked: + case EventTy::SyncRegion: + case EventTy::MutexAcquire: + case EventTy::Mutex: + case EventTy::NestLock: + case EventTy::Flush: + case EventTy::Cancel: + case EventTy::DeviceInitialize: + case EventTy::DeviceFinalize: + case EventTy::DeviceLoad: + case EventTy::DeviceUnload: + case EventTy::BufferRequest: + case EventTy::BufferComplete: + case EventTy::BufferRecordDeallocation: + return true; + // Some event types must not be encountered + case EventTy::None: + case EventTy::AssertionSyncPoint: + case EventTy::AssertionSuspend: + default: + assert(false && "Encountered invalid event type"); + } + + return true; +} + +void OmptAsserter::setOperationMode(AssertMode Mode) { OperationMode = Mode; } + +void OmptSequencedAsserter::insert(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + Events.emplace_back(std::move(AE)); +} + +void OmptSequencedAsserter::notifyImpl(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + // Ignore notifications while inactive, or for suppressed events + if (Events.empty() || !isActive() || isSuppressedEventType(AE.getEventType())) + return; + + ++NumNotifications; + + // Note: Order of these checks has semantic meaning. + // (1) Synchronization points should fail if there are remaining events, + // otherwise pass. (2) Regular notification while no further events are + // expected: fail. (3) Assertion suspension relies on a next expected event + // being available. (4) All other cases are considered 'regular' and match the + // next expected against the observed event. (5+6) Depending on the state / + // mode we signal failure if no other check has done already, or signaled pass + // by early-exit. + if (consumeSyncPoint(AE) || // Handle observed SyncPoint event + checkExcessNotify(AE) || // Check for remaining expected + consumeSuspend() || // Handle requested suspend + consumeRegularEvent(AE) || // Handle regular event + AssertionSuspended || // Ignore fail, if suspended + OperationMode == AssertMode::relaxed) // Ignore fail, if relaxed op-mode + return; + + Log->eventMismatch(Events[NextEvent], AE, + "[OmptSequencedAsserter] The events are not equal"); + State = AssertState::fail; +} + +bool OmptSequencedAsserter::consumeSyncPoint( + const omptest::OmptAssertEvent &AE) { + if (AE.getEventType() == EventTy::AssertionSyncPoint) { + auto NumRemainingEvents = getRemainingEventCount(); + // Upon encountering a SyncPoint, all events should have been processed + if (NumRemainingEvents == 0) + return true; + + Log->eventMismatch( + AE, + "[OmptSequencedAsserter] Encountered SyncPoint while still awaiting " + + std::to_string(NumRemainingEvents) + " events. Asserted " + + std::to_string(NumSuccessfulAsserts) + "/" + + std::to_string(Events.size()) + " events successfully."); + State = AssertState::fail; + return true; + } + + // Nothing to process: continue. + return false; +} + +bool OmptSequencedAsserter::checkExcessNotify( + const omptest::OmptAssertEvent &AE) { + if (NextEvent >= Events.size()) { + // If we are not expecting any more events and passively asserting: return + if (AssertionSuspended) + return true; + + Log->eventMismatch( + AE, "[OmptSequencedAsserter] Too many events to check (" + + std::to_string(NumNotifications) + "). Asserted " + + std::to_string(NumSuccessfulAsserts) + "/" + + std::to_string(Events.size()) + " events successfully."); + State = AssertState::fail; + return true; + } + + // Remaining expected events present: continue. + return false; +} + +bool OmptSequencedAsserter::consumeSuspend() { + // On AssertionSuspend -- enter 'passive' assertion. + // Since we may encounter multiple, successive AssertionSuspend events, loop + // until we hit the next non-AssertionSuspend event. + while (Events[NextEvent].getEventType() == EventTy::AssertionSuspend) { + AssertionSuspended = true; + // We just hit the very last event: indicate early exit. + if (++NextEvent >= Events.size()) + return true; + } + + // Continue with remaining notification logic. + return false; +} + +bool OmptSequencedAsserter::consumeRegularEvent( + const omptest::OmptAssertEvent &AE) { + // If we are actively asserting, increment the event counter. + // Otherwise: If passively asserting, we will keep waiting for a match. + auto &E = Events[NextEvent]; + if (E == AE && verifyEventGroups(E, AE)) { + if (E.getEventExpectedState() == ObserveState::always) { + ++NumSuccessfulAsserts; + } else if (E.getEventExpectedState() == ObserveState::never) { + Log->eventMismatch(E, AE, + "[OmptSequencedAsserter] Encountered forbidden event"); + State = AssertState::fail; + } + + // Return to active assertion + if (AssertionSuspended) + AssertionSuspended = false; + + // Match found, increment index and indicate early exit (success). + ++NextEvent; + return true; + } + + // Continue with remaining notification logic. + return false; +} + +size_t OmptSequencedAsserter::getRemainingEventCount() { + return std::count_if(Events.begin(), Events.end(), + [](const omptest::OmptAssertEvent &E) { + return E.getEventExpectedState() == + ObserveState::always; + }) - + NumSuccessfulAsserts; +} + +AssertState OmptSequencedAsserter::checkState() { + // This is called after the testcase executed. + // Once reached the number of successful notifications should be equal to the + // number of expected events. However, there may still be excluded as well as + // special asserter events remaining in the sequence. + for (size_t i = NextEvent; i < Events.size(); ++i) { + auto &E = Events[i]; + if (E.getEventExpectedState() == ObserveState::always) { + State = AssertState::fail; + Log->eventMismatch(E, "[OmptSequencedAsserter] Expected event was not " + "encountered (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + break; + } + } + + return State; +} + +void OmptEventAsserter::insert(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + Events.emplace_back(std::move(AE)); +} + +void OmptEventAsserter::notifyImpl(OmptAssertEvent &&AE) { + std::lock_guard Lock(AssertMutex); + if (Events.empty() || !isActive() || isSuppressedEventType(AE.getEventType())) + return; + + if (NumEvents == 0) + NumEvents = Events.size(); + + ++NumNotifications; + + if (AE.getEventType() == EventTy::AssertionSyncPoint) { + auto NumRemainingEvents = getRemainingEventCount(); + // Upon encountering a SyncPoint, all events should have been processed + if (NumRemainingEvents == 0) + return; + + Log->eventMismatch( + AE, "[OmptEventAsserter] Encountered SyncPoint while still awaiting " + + std::to_string(NumRemainingEvents) + " events. Asserted " + + std::to_string(NumSuccessfulAsserts) + " events successfully."); + State = AssertState::fail; + return; + } + + for (size_t i = 0; i < Events.size(); ++i) { + auto &E = Events[i]; + if (E == AE && verifyEventGroups(E, AE)) { + if (E.getEventExpectedState() == ObserveState::always) { + Events.erase(Events.begin() + i); + ++NumSuccessfulAsserts; + } else if (E.getEventExpectedState() == ObserveState::never) { + Log->eventMismatch(E, AE, + "[OmptEventAsserter] Encountered forbidden event"); + State = AssertState::fail; + } + return; + } + } + + if (OperationMode == AssertMode::strict) { + Log->eventMismatch(AE, "[OmptEventAsserter] Too many events to check (" + + std::to_string(NumNotifications) + + "). Asserted " + + std::to_string(NumSuccessfulAsserts) + + " events successfully. (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + State = AssertState::fail; + return; + } +} + +size_t OmptEventAsserter::getRemainingEventCount() { + return std::count_if( + Events.begin(), Events.end(), [](const omptest::OmptAssertEvent &E) { + return E.getEventExpectedState() == ObserveState::always; + }); +} + +AssertState OmptEventAsserter::checkState() { + // This is called after the testcase executed. + // Once reached no more expected events should be in the queue + for (const auto &E : Events) { + // Check if any of the remaining events were expected to be observed + if (E.getEventExpectedState() == ObserveState::always) { + State = AssertState::fail; + Log->eventMismatch(E, "[OmptEventAsserter] Expected event was not " + "encountered (Remaining events: " + + std::to_string(getRemainingEventCount()) + ")"); + break; + } + } + + return State; +} + +void OmptEventReporter::notify(OmptAssertEvent &&AE) { + if (!isActive() || isSuppressedEventType(AE.getEventType())) + return; + + // Prepare notification, containing the newline to avoid stream interleaving. + auto Notification{AE.toString()}; + Notification.push_back('\n'); + OutStream << Notification; +} + +bool OmptEventGroupInterface::addActiveEventGroup( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + if (EventGroup != ActiveEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion) + return false; + ActiveEventGroups.emplace(GroupName, Group); + return true; +} + +bool OmptEventGroupInterface::deprecateActiveEventGroup( + const std::string &GroupName) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + auto DeprecatedEventGroup = DeprecatedEventGroups.find(GroupName); + if (EventGroup == ActiveEventGroups.end() && + DeprecatedEventGroup != DeprecatedEventGroups.end()) + return false; + DeprecatedEventGroups.emplace(GroupName, EventGroup->second); + ActiveEventGroups.erase(GroupName); + return true; +} + +bool OmptEventGroupInterface::checkActiveEventGroups( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = ActiveEventGroups.find(GroupName); + return (EventGroup != ActiveEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion); +} + +bool OmptEventGroupInterface::checkDeprecatedEventGroups( + const std::string &GroupName, omptest::AssertEventGroup Group) { + std::lock_guard Lock(GroupMutex); + auto EventGroup = DeprecatedEventGroups.find(GroupName); + return (EventGroup != DeprecatedEventGroups.end() && + EventGroup->second.TargetRegion == Group.TargetRegion); +} diff --git a/openmp/tools/omptest/src/OmptCallbackHandler.cpp b/openmp/tools/omptest/src/OmptCallbackHandler.cpp new file mode 100644 index 0000000000000..0794a1c27a902 --- /dev/null +++ b/openmp/tools/omptest/src/OmptCallbackHandler.cpp @@ -0,0 +1,445 @@ +//===- OmptCallbackHandler.cpp - OMPT Callback handling impl. ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the OMPT callback handling implementations. +/// +//===----------------------------------------------------------------------===// + +#include "OmptCallbackHandler.h" + +using namespace omptest; + +OmptCallbackHandler *Handler = nullptr; + +OmptCallbackHandler &OmptCallbackHandler::get() { + if (Handler == nullptr) + Handler = new OmptCallbackHandler(); + + return *Handler; +} + +void OmptCallbackHandler::subscribe(OmptListener *Listener) { + Subscribers.push_back(Listener); +} + +void OmptCallbackHandler::clearSubscribers() { + replay(); + + Subscribers.clear(); +} + +void OmptCallbackHandler::replay() { + if (!RecordAndReplay) + return; + + for (auto &E : RecordedEvents) + for (const auto &S : Subscribers) + S->notify(std::move(E)); +} + +void OmptCallbackHandler::handleThreadBegin(ompt_thread_t ThreadType, + ompt_data_t *ThreadData) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ThreadBegin( + "Thread Begin", "", ObserveState::generated, ThreadType)); + return; + } + + // Initial thread event likely to preceed assertion registration, so skip + if (ThreadType == ompt_thread_initial) + return; + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ThreadBegin( + "Thread Begin", "", ObserveState::generated, ThreadType)); +} + +void OmptCallbackHandler::handleThreadEnd(ompt_data_t *ThreadData) { + if (RecordAndReplay) { + recordEvent( + OmptAssertEvent::ThreadEnd("Thread End", "", ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify( + OmptAssertEvent::ThreadEnd("Thread End", "", ObserveState::generated)); +} + +void OmptCallbackHandler::handleTaskCreate( + ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *NewTaskData, + int Flags, int HasDependences, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TaskCreate( + "Task Create", "", ObserveState::generated, EncounteringTaskData, + EncounteringTaskFrame, NewTaskData, Flags, HasDependences, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TaskCreate( + "Task Create", "", ObserveState::generated, EncounteringTaskData, + EncounteringTaskFrame, NewTaskData, Flags, HasDependences, CodeptrRA)); +} + +void OmptCallbackHandler::handleTaskSchedule(ompt_data_t *PriorTaskData, + ompt_task_status_t PriorTaskStatus, + ompt_data_t *NextTaskData) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TaskSchedule("Task Schedule", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TaskSchedule("Task Schedule", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleImplicitTask(ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, + unsigned int ActualParallelism, + unsigned int Index, int Flags) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ImplicitTask( + "Implicit Task", "", ObserveState::generated, Endpoint, ParallelData, + TaskData, ActualParallelism, Index, Flags)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ImplicitTask( + "Implicit Task", "", ObserveState::generated, Endpoint, ParallelData, + TaskData, ActualParallelism, Index, Flags)); +} + +void OmptCallbackHandler::handleParallelBegin( + ompt_data_t *EncounteringTaskData, + const ompt_frame_t *EncounteringTaskFrame, ompt_data_t *ParallelData, + unsigned int RequestedParallelism, int Flags, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ParallelBegin( + "Parallel Begin", "", ObserveState::generated, RequestedParallelism)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ParallelBegin( + "Parallel Begin", "", ObserveState::generated, RequestedParallelism)); +} + +void OmptCallbackHandler::handleParallelEnd(ompt_data_t *ParallelData, + ompt_data_t *EncounteringTaskData, + int Flags, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::ParallelEnd("Parallel End", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::ParallelEnd("Parallel End", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleDeviceInitialize( + int DeviceNum, const char *Type, ompt_device_t *Device, + ompt_function_lookup_t LookupFn, const char *DocumentationStr) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceInitialize( + "Device Inititalize", "", ObserveState::generated, DeviceNum, Type, + Device, LookupFn, DocumentationStr)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceInitialize( + "Device Inititalize", "", ObserveState::generated, DeviceNum, Type, + Device, LookupFn, DocumentationStr)); +} + +void OmptCallbackHandler::handleDeviceFinalize(int DeviceNum) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceFinalize( + "Device Finalize", "", ObserveState::generated, DeviceNum)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceFinalize( + "Device Finalize", "", ObserveState::generated, DeviceNum)); +} + +void OmptCallbackHandler::handleTarget(ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_id_t TargetId, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Target("Target", "", ObserveState::generated, + Kind, Endpoint, DeviceNum, TaskData, + TargetId, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Target("Target", "", ObserveState::generated, + Kind, Endpoint, DeviceNum, TaskData, + TargetId, CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetEmi(ompt_target_t Kind, + ompt_scope_endpoint_t Endpoint, + int DeviceNum, ompt_data_t *TaskData, + ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetEmi( + "Target EMI", "", ObserveState::generated, Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, TargetData, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetEmi( + "Target EMI", "", ObserveState::generated, Kind, Endpoint, DeviceNum, + TaskData, TargetTaskData, TargetData, CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetSubmit(ompt_id_t TargetId, + ompt_id_t HostOpId, + unsigned int RequestedNumTeams) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetSubmit("Target Submit", "", + ObserveState::generated, TargetId, + HostOpId, RequestedNumTeams)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetSubmit("Target Submit", "", + ObserveState::generated, TargetId, + HostOpId, RequestedNumTeams)); +} + +void OmptCallbackHandler::handleTargetSubmitEmi( + ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetData, + ompt_id_t *HostOpId, unsigned int RequestedNumTeams) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetSubmitEmi( + "Target Submit EMI", "", ObserveState::generated, Endpoint, TargetData, + HostOpId, RequestedNumTeams)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetSubmitEmi( + "Target Submit EMI", "", ObserveState::generated, Endpoint, TargetData, + HostOpId, RequestedNumTeams)); +} + +void OmptCallbackHandler::handleTargetDataOp( + ompt_id_t TargetId, ompt_id_t HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetDataOp( + "Target Data Op", "", ObserveState::generated, TargetId, HostOpId, + OpType, SrcAddr, SrcDeviceNum, DstAddr, DstDeviceNum, Bytes, + CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetDataOp( + "Target Data Op", "", ObserveState::generated, TargetId, HostOpId, + OpType, SrcAddr, SrcDeviceNum, DstAddr, DstDeviceNum, Bytes, + CodeptrRA)); +} + +void OmptCallbackHandler::handleTargetDataOpEmi( + ompt_scope_endpoint_t Endpoint, ompt_data_t *TargetTaskData, + ompt_data_t *TargetData, ompt_id_t *HostOpId, ompt_target_data_op_t OpType, + void *SrcAddr, int SrcDeviceNum, void *DstAddr, int DstDeviceNum, + size_t Bytes, const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::TargetDataOpEmi( + "Target Data Op EMI", "", ObserveState::generated, Endpoint, + TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::TargetDataOpEmi( + "Target Data Op EMI", "", ObserveState::generated, Endpoint, + TargetTaskData, TargetData, HostOpId, OpType, SrcAddr, SrcDeviceNum, + DstAddr, DstDeviceNum, Bytes, CodeptrRA)); +} + +void OmptCallbackHandler::handleDeviceLoad(int DeviceNum, const char *Filename, + int64_t OffsetInFile, + void *VmaInFile, size_t Bytes, + void *HostAddr, void *DeviceAddr, + uint64_t ModuleId) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceLoad( + "Device Load", "", ObserveState::generated, DeviceNum, Filename, + OffsetInFile, VmaInFile, Bytes, HostAddr, DeviceAddr, ModuleId)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceLoad( + "Device Load", "", ObserveState::generated, DeviceNum, Filename, + OffsetInFile, VmaInFile, Bytes, HostAddr, DeviceAddr, ModuleId)); +} + +void OmptCallbackHandler::handleDeviceUnload(int DeviceNum, uint64_t ModuleId) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::DeviceUnload("Device Unload", "", + ObserveState::generated)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::DeviceUnload("Device Unload", "", + ObserveState::generated)); +} + +void OmptCallbackHandler::handleBufferRequest(int DeviceNum, + ompt_buffer_t **Buffer, + size_t *Bytes) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRequest("Buffer Request", "", + ObserveState::generated, + DeviceNum, Buffer, Bytes)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRequest("Buffer Request", "", + ObserveState::generated, DeviceNum, + Buffer, Bytes)); +} + +void OmptCallbackHandler::handleBufferComplete(int DeviceNum, + ompt_buffer_t *Buffer, + size_t Bytes, + ompt_buffer_cursor_t Begin, + int BufferOwned) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferComplete( + "Buffer Complete", "", ObserveState::generated, DeviceNum, Buffer, + Bytes, Begin, BufferOwned)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferComplete( + "Buffer Complete", "", ObserveState::generated, DeviceNum, Buffer, + Bytes, Begin, BufferOwned)); +} + +void OmptCallbackHandler::handleBufferRecord(ompt_record_ompt_t *Record) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRecord("Buffer Record", "", + ObserveState::generated, Record)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRecord("Buffer Record", "", + ObserveState::generated, Record)); +} + +void OmptCallbackHandler::handleBufferRecordDeallocation( + ompt_buffer_t *Buffer) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::BufferRecordDeallocation( + "Buffer Deallocation", "", ObserveState::generated, Buffer)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::BufferRecordDeallocation( + "Buffer Deallocation", "", ObserveState::generated, Buffer)); +} + +void OmptCallbackHandler::handleWork(ompt_work_t WorkType, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, uint64_t Count, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Work("Work", "", ObserveState::generated, + WorkType, Endpoint, ParallelData, + TaskData, Count, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Work("Work", "", ObserveState::generated, + WorkType, Endpoint, ParallelData, TaskData, + Count, CodeptrRA)); +} + +void OmptCallbackHandler::handleSyncRegion(ompt_sync_region_t Kind, + ompt_scope_endpoint_t Endpoint, + ompt_data_t *ParallelData, + ompt_data_t *TaskData, + const void *CodeptrRA) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::SyncRegion( + "SyncRegion", "", ObserveState::generated, Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::SyncRegion( + "SyncRegion", "", ObserveState::generated, Kind, Endpoint, ParallelData, + TaskData, CodeptrRA)); +} + +void OmptCallbackHandler::handleDispatch(ompt_data_t *ParallelData, + ompt_data_t *TaskData, + ompt_dispatch_t Kind, + ompt_data_t Instance) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::Dispatch("Dispatch", "", + ObserveState::generated, ParallelData, + TaskData, Kind, Instance)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::Dispatch("Dispatch", "", ObserveState::generated, + ParallelData, TaskData, Kind, + Instance)); +} + +void OmptCallbackHandler::handleAssertionSyncPoint( + const std::string &SyncPointName) { + if (RecordAndReplay) { + recordEvent(OmptAssertEvent::AssertionSyncPoint( + "Assertion SyncPoint", "", ObserveState::generated, SyncPointName)); + return; + } + + for (const auto &S : Subscribers) + S->notify(OmptAssertEvent::AssertionSyncPoint( + "Assertion SyncPoint", "", ObserveState::generated, SyncPointName)); +} + +void OmptCallbackHandler::recordEvent(OmptAssertEvent &&Event) { + RecordedEvents.emplace_back(std::forward(Event)); +} diff --git a/openmp/tools/omptest/src/OmptTester.cpp b/openmp/tools/omptest/src/OmptTester.cpp new file mode 100644 index 0000000000000..8813f7a7d3eaa --- /dev/null +++ b/openmp/tools/omptest/src/OmptTester.cpp @@ -0,0 +1,504 @@ +//===- OmptTester.cpp - ompTest OMPT tool implementation --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the core implementation file for the ompTest library. +/// It provides the actual OMPT tool implementation: registers callbacks, etc. +/// OMPT callbacks are passed to their corresponding handler, which in turn +/// notifies all registered asserters. +/// +//===----------------------------------------------------------------------===// + +#include "OmptTester.h" + +#include +#include +#include +#include + +using namespace omptest; + +// Callback handler, which receives and relays OMPT callbacks +extern OmptCallbackHandler *Handler; + +// EventListener, which actually prints the OMPT events +static OmptEventReporter *EventReporter; + +// From openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +#define OMPT_BUFFER_REQUEST_SIZE 256 + +#ifdef OPENMP_LIBOMPTEST_BUILD_STANDALONE +std::map TestRegistrar::Tests; +#endif + +static std::atomic NextOpId{0x8000000000000001}; +static bool UseEMICallbacks = false; +static bool UseTracing = false; +static bool RunAsTestSuite = false; +static bool ColoredLog = false; + +// OMPT entry point handles +static ompt_set_trace_ompt_t ompt_set_trace_ompt = 0; +static ompt_start_trace_t ompt_start_trace = 0; +static ompt_flush_trace_t ompt_flush_trace = 0; +static ompt_stop_trace_t ompt_stop_trace = 0; +static ompt_get_record_ompt_t ompt_get_record_ompt = 0; +static ompt_advance_buffer_cursor_t ompt_advance_buffer_cursor = 0; +static ompt_get_record_type_t ompt_get_record_type_fn = 0; + +// OMPT device side tracing: Currently traced devices +typedef std::unordered_set OmptDeviceSetTy; +typedef std::unique_ptr OmptDeviceSetPtrTy; +static OmptDeviceSetPtrTy TracedDevices; + +// OMPT callbacks + +// Trace record callbacks +static void on_ompt_callback_buffer_request(int device_num, + ompt_buffer_t **buffer, + size_t *bytes) { + *bytes = OMPT_BUFFER_REQUEST_SIZE; + *buffer = malloc(*bytes); + OmptCallbackHandler::get().handleBufferRequest(device_num, buffer, bytes); +} + +// Note: This callback must handle a null begin cursor. Currently, +// ompt_get_record_ompt, print_record_ompt, and +// ompt_advance_buffer_cursor handle a null cursor. +static void on_ompt_callback_buffer_complete( + int device_num, ompt_buffer_t *buffer, + size_t bytes, /* bytes returned in this callback */ + ompt_buffer_cursor_t begin, int buffer_owned) { + OmptCallbackHandler::get().handleBufferComplete(device_num, buffer, bytes, + begin, buffer_owned); + + int Status = 1; + ompt_buffer_cursor_t CurrentPos = begin; + while (Status) { + ompt_record_ompt_t *Record = ompt_get_record_ompt(buffer, CurrentPos); + if (ompt_get_record_type_fn(buffer, CurrentPos) != ompt_record_ompt) { + printf("WARNING: received non-ompt type buffer object\n"); + } + // TODO: Sometimes it may happen that the retrieved record may be null?! + // Only handle non-null records + if (Record != nullptr) + OmptCallbackHandler::get().handleBufferRecord(Record); + Status = ompt_advance_buffer_cursor(/*device=*/NULL, buffer, bytes, + CurrentPos, &CurrentPos); + } + if (buffer_owned) { + OmptCallbackHandler::get().handleBufferRecordDeallocation(buffer); + free(buffer); + } +} + +static ompt_set_result_t set_trace_ompt(ompt_device_t *Device) { + if (!ompt_set_trace_ompt) + return ompt_set_error; + + if (UseEMICallbacks) { + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_emi); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_data_op_emi); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_submit_emi); + } else { + ompt_set_trace_ompt(Device, /*enable=*/1, /*etype=*/ompt_callback_target); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_data_op); + ompt_set_trace_ompt(Device, /*enable=*/1, + /*etype=*/ompt_callback_target_submit); + } + + return ompt_set_always; +} + +/////// HOST-RELATED ////// + +static void on_ompt_callback_thread_begin(ompt_thread_t thread_type, + ompt_data_t *thread_data) { + OmptCallbackHandler::get().handleThreadBegin(thread_type, thread_data); +} + +static void on_ompt_callback_thread_end(ompt_data_t *thread_data) { + OmptCallbackHandler::get().handleThreadEnd(thread_data); +} + +static void on_ompt_callback_parallel_begin( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, + unsigned int requested_parallelism, int flags, const void *codeptr_ra) { + OmptCallbackHandler::get().handleParallelBegin( + encountering_task_data, encountering_task_frame, parallel_data, + requested_parallelism, flags, codeptr_ra); +} + +static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + int flags, const void *codeptr_ra) { + OmptCallbackHandler::get().handleParallelEnd( + parallel_data, encountering_task_data, flags, codeptr_ra); +} + +static void +on_ompt_callback_task_create(ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *new_task_data, int flags, + int has_dependences, const void *codeptr_ra) { + OmptCallbackHandler::get().handleTaskCreate( + encountering_task_data, encountering_task_frame, new_task_data, flags, + has_dependences, codeptr_ra); +} + +static void on_ompt_callback_task_schedule(ompt_data_t *prior_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *next_task_data) { + OmptCallbackHandler::get().handleTaskSchedule( + prior_task_data, prior_task_status, next_task_data); +} + +static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int actual_parallelism, + unsigned int index, int flags) { + OmptCallbackHandler::get().handleImplicitTask( + endpoint, parallel_data, task_data, actual_parallelism, index, flags); +} + +// Callbacks as of Table 19.4, which are not considered required for a minimal +// conforming OMPT implementation. +static void on_ompt_callback_work(ompt_work_t work_type, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, uint64_t count, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleWork(work_type, endpoint, parallel_data, + task_data, count, codeptr_ra); +} + +static void on_ompt_callback_dispatch(ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance) { + OmptCallbackHandler::get().handleDispatch(parallel_data, task_data, kind, + instance); +} + +static void on_ompt_callback_sync_region(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleSyncRegion(kind, endpoint, parallel_data, + task_data, codeptr_ra); +} + +/////// DEVICE-RELATED ////// + +// Synchronous callbacks +static void on_ompt_callback_device_initialize(int device_num, const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation) { + OmptCallbackHandler::get().handleDeviceInitialize(device_num, type, device, + lookup, documentation); + if (!UseTracing) + return; + + if (!lookup) { + printf("Trace collection disabled on device %d\n", device_num); + return; + } + + ompt_set_trace_ompt = (ompt_set_trace_ompt_t)lookup("ompt_set_trace_ompt"); + ompt_start_trace = (ompt_start_trace_t)lookup("ompt_start_trace"); + ompt_flush_trace = (ompt_flush_trace_t)lookup("ompt_flush_trace"); + ompt_stop_trace = (ompt_stop_trace_t)lookup("ompt_stop_trace"); + ompt_get_record_ompt = (ompt_get_record_ompt_t)lookup("ompt_get_record_ompt"); + ompt_advance_buffer_cursor = + (ompt_advance_buffer_cursor_t)lookup("ompt_advance_buffer_cursor"); + + ompt_get_record_type_fn = + (ompt_get_record_type_t)lookup("ompt_get_record_type"); + if (!ompt_get_record_type_fn) { + printf("WARNING: No function ompt_get_record_type found in device " + "callbacks\n"); + } + + static bool IsDeviceMapInitialized = false; + if (!IsDeviceMapInitialized) { + TracedDevices = std::make_unique(); + IsDeviceMapInitialized = true; + } + + set_trace_ompt(device); + + // In many scenarios, this is a good place to start the + // trace. If start_trace is called from the main program before this + // callback is dispatched, the start_trace handle will be null. This + // is because this device_init callback is invoked during the first + // target construct implementation. + + start_trace(device); +} + +static void on_ompt_callback_device_finalize(int device_num) { + OmptCallbackHandler::get().handleDeviceFinalize(device_num); +} + +static void on_ompt_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, + void *vma_in_file, size_t bytes, + void *host_addr, void *device_addr, + uint64_t module_id) { + OmptCallbackHandler::get().handleDeviceLoad( + device_num, filename, offset_in_file, vma_in_file, bytes, host_addr, + device_addr, module_id); +} + +static void on_ompt_callback_device_unload(int device_num, uint64_t module_id) { + OmptCallbackHandler::get().handleDeviceUnload(device_num, module_id); +} + +static void on_ompt_callback_target_data_op( + ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra) { + OmptCallbackHandler::get().handleTargetDataOp( + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra) { + OmptCallbackHandler::get().handleTarget(kind, endpoint, device_num, task_data, + target_id, codeptr_ra); +} + +static void on_ompt_callback_target_submit(ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams) { + OmptCallbackHandler::get().handleTargetSubmit(target_id, host_op_id, + requested_num_teams); +} + +static void on_ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_task_data, + ompt_data_t *target_data, ompt_id_t *host_op_id, + ompt_target_data_op_t optype, void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, size_t bytes, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + // Both src and dest must not be null + // However, for omp_target_alloc only the END call holds a value for one of + // the two entries + if (optype != ompt_target_data_alloc) + assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); + if (endpoint == ompt_scope_begin) + *host_op_id = NextOpId.fetch_add(1, std::memory_order_relaxed); + OmptCallbackHandler::get().handleTargetDataOpEmi( + endpoint, target_task_data, target_data, host_op_id, optype, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra); +} + +static void on_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, ompt_data_t *task_data, + ompt_data_t *target_task_data, + ompt_data_t *target_data, + const void *codeptr_ra) { + assert(codeptr_ra != 0 && "Unexpected null codeptr"); + if (endpoint == ompt_scope_begin) + target_data->value = NextOpId.fetch_add(1, std::memory_order_relaxed); + OmptCallbackHandler::get().handleTargetEmi(kind, endpoint, device_num, + task_data, target_task_data, + target_data, codeptr_ra); +} + +static void on_ompt_callback_target_submit_emi( + ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, + ompt_id_t *host_op_id, unsigned int requested_num_teams) { + OmptCallbackHandler::get().handleTargetSubmitEmi( + endpoint, target_data, host_op_id, requested_num_teams); +} + +static void on_ompt_callback_target_map(ompt_id_t target_id, + unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map callback is unimplemented"); +} + +static void on_ompt_callback_target_map_emi(ompt_data_t *target_data, + unsigned int nitems, + void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra) { + assert(0 && "Target map emi callback is unimplemented"); +} + +/// Load the value of a given boolean environmental variable. +bool getBoolEnvironmentVariable(const char *VariableName) { + if (VariableName == nullptr) + return false; + if (const char *EnvValue = std::getenv(VariableName)) { + std::string S{EnvValue}; + for (auto &C : S) + C = (char)std::tolower(C); + if (S == "1" || S == "on" || S == "true" || S == "yes") + return true; + } + return false; +} + +/// Called by the OMP runtime to initialize the OMPT +int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_set_callback_t ompt_set_callback = nullptr; + ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); + if (!ompt_set_callback) + return 0; // failure + + UseEMICallbacks = getBoolEnvironmentVariable("OMPTEST_USE_OMPT_EMI"); + UseTracing = getBoolEnvironmentVariable("OMPTEST_USE_OMPT_TRACING"); + RunAsTestSuite = getBoolEnvironmentVariable("OMPTEST_RUN_AS_TESTSUITE"); + ColoredLog = getBoolEnvironmentVariable("OMPTEST_LOG_COLORED"); + + register_ompt_callback(ompt_callback_thread_begin); + register_ompt_callback(ompt_callback_thread_end); + register_ompt_callback(ompt_callback_parallel_begin); + register_ompt_callback(ompt_callback_parallel_end); + register_ompt_callback(ompt_callback_work); + // register_ompt_callback(ompt_callback_dispatch); + register_ompt_callback(ompt_callback_task_create); + // register_ompt_callback(ompt_callback_dependences); + // register_ompt_callback(ompt_callback_task_dependence); + register_ompt_callback(ompt_callback_task_schedule); + register_ompt_callback(ompt_callback_implicit_task); + // register_ompt_callback(ompt_callback_masked); + register_ompt_callback(ompt_callback_sync_region); + // register_ompt_callback(ompt_callback_mutex_acquire); + // register_ompt_callback(ompt_callback_mutex); + // register_ompt_callback(ompt_callback_nestLock); + // register_ompt_callback(ompt_callback_flush); + // register_ompt_callback(ompt_callback_cancel); + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_device_unload); + + if (UseEMICallbacks) { + register_ompt_callback(ompt_callback_target_emi); + register_ompt_callback(ompt_callback_target_submit_emi); + register_ompt_callback(ompt_callback_target_data_op_emi); + register_ompt_callback(ompt_callback_target_map_emi); + } else { + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target_map); + } + + // Construct & subscribe the reporter, so it gets notified of events + EventReporter = new OmptEventReporter(); + OmptCallbackHandler::get().subscribe(EventReporter); + + if (RunAsTestSuite) + EventReporter->setActive(false); + + return 1; // success +} + +void ompt_finalize(ompt_data_t *tool_data) { + assert(Handler && "Callback handler should be present at this point"); + assert(EventReporter && "EventReporter should be present at this point"); + delete Handler; + delete EventReporter; +} + +#ifdef __cplusplus +extern "C" { +#endif +/// Called from the OMP Runtime to start / initialize the tool +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = { + &ompt_initialize, &ompt_finalize, {0}}; + return &ompt_start_tool_result; +} + +int start_trace(ompt_device_t *Device) { + if (!ompt_start_trace) + return 0; + + // Start tracing this device (erase from set) + assert(TracedDevices->find(Device) == TracedDevices->end() && + "Device already present in the map"); + TracedDevices->insert(Device); + + return ompt_start_trace(Device, &on_ompt_callback_buffer_request, + &on_ompt_callback_buffer_complete); +} + +int flush_trace(ompt_device_t *Device) { + if (!ompt_flush_trace) + return 0; + return ompt_flush_trace(Device); +} + +int flush_traced_devices() { + if (!ompt_flush_trace || TracedDevices == nullptr) + return 0; + + size_t NumFlushedDevices = 0; + for (auto Device : *TracedDevices) + if (ompt_flush_trace(Device) == 1) + ++NumFlushedDevices; + + // Provide time to process triggered assert events + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + + return (NumFlushedDevices == TracedDevices->size()); +} + +int stop_trace(ompt_device_t *Device) { + if (!ompt_stop_trace) + return 0; + + // Stop tracing this device (erase from set) + assert(TracedDevices->find(Device) != TracedDevices->end() && + "Device not present in the map"); + TracedDevices->erase(Device); + + return ompt_stop_trace(Device); +} + +// This is primarily used to stop unwanted prints from happening. +void libomptest_global_eventreporter_set_active(bool State) { + assert(EventReporter && "EventReporter should be present at this point"); + EventReporter->setActive(State); +} +#ifdef __cplusplus +} +#endif diff --git a/openmp/tools/omptest/src/OmptTesterStandalone.cpp b/openmp/tools/omptest/src/OmptTesterStandalone.cpp new file mode 100644 index 0000000000000..d4f68b4576536 --- /dev/null +++ b/openmp/tools/omptest/src/OmptTesterStandalone.cpp @@ -0,0 +1,147 @@ +//===- OmptTesterStandalone.cpp - Standalone unit testing impl. -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file represents the 'standalone' ompTest unit testing core +/// implementation, defining the general test suite and test case execution. +/// +//===----------------------------------------------------------------------===// + +#include "OmptTesterStandalone.h" +#include "OmptCallbackHandler.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace omptest; + +Error TestCase::exec() { + Error E; + E.Fail = false; + + if (IsDisabled) + return E; + + OmptCallbackHandler::get().subscribe(SequenceAsserter.get()); + OmptCallbackHandler::get().subscribe(SetAsserter.get()); + OmptCallbackHandler::get().subscribe(EventReporter.get()); + + execImpl(); + + // Actively flush potential in-flight trace records + flush_traced_devices(); + + // We remove subscribers to not be notified of events after our test case + // finished. + OmptCallbackHandler::get().clearSubscribers(); + omptest::AssertState SequenceResultState = SequenceAsserter->checkState(); + omptest::AssertState SetResultState = SetAsserter->checkState(); + bool AnyFail = SequenceResultState == omptest::AssertState::fail || + SetResultState == omptest::AssertState::fail; + bool AllPass = SequenceResultState == omptest::AssertState::pass && + SetResultState == omptest::AssertState::pass; + if (ExpectedState == omptest::AssertState::pass && AnyFail) + E.Fail = true; + else if (ExpectedState == omptest::AssertState::fail && AllPass) + E.Fail = true; + if (AnyFail) + ResultState = omptest::AssertState::fail; + return E; +} + +TestSuite::TestSuite(TestSuite &&O) { + Name = O.Name; + TestCases.swap(O.TestCases); +} + +void TestSuite::setup() {} + +void TestSuite::teardown() {} + +TestSuite::TestCaseVec::iterator TestSuite::begin() { + return TestCases.begin(); +} + +TestSuite::TestCaseVec::iterator TestSuite::end() { return TestCases.end(); } + +TestRegistrar &TestRegistrar::get() { + static TestRegistrar TR; + return TR; +} + +std::vector TestRegistrar::getTestSuites() { + std::vector TSs; + for (auto &[k, v] : Tests) + TSs.emplace_back(std::move(v)); + return TSs; +} + +void TestRegistrar::addCaseToSuite(TestCase *TC, std::string TSName) { + auto &TS = Tests[TSName]; + if (TS.Name.empty()) + TS.Name = TSName; + TS.TestCases.emplace_back(TC); +} + +Registerer::Registerer(TestCase *TC, const std::string SuiteName) { + std::cout << "Adding " << TC->Name << " to " << SuiteName << std::endl; + TestRegistrar::get().addCaseToSuite(TC, SuiteName); +} + +int Runner::run() { + int ErrorCount = 0; + for (auto &TS : TestSuites) { + std::cout << "\n======\nExecuting for " << TS.Name << std::endl; + TS.setup(); + for (auto &TC : TS) { + std::cout << "\nExecuting " << TC->Name << std::endl; + if (Error Err = TC->exec()) { + reportError(Err); + abortOrKeepGoing(); + ++ErrorCount; + } + } + TS.teardown(); + } + printSummary(); + return ErrorCount; +} + +void Runner::reportError(const Error &Err) {} + +void Runner::abortOrKeepGoing() {} + +void Runner::printSummary() { + std::cout << "\n====== SUMMARY\n"; + for (auto &TS : TestSuites) { + std::cout << " - " << TS.Name; + for (auto &TC : TS) { + std::string Result; + if (TC->IsDisabled) { + Result = "-#-#-"; + } else if (TC->ResultState == TC->ExpectedState) { + if (TC->ResultState == omptest::AssertState::pass) + Result = "PASS"; + else if (TC->ResultState == omptest::AssertState::fail) + Result = "XFAIL"; + } else { + if (TC->ResultState == omptest::AssertState::fail) + Result = "FAIL"; + else if (TC->ResultState == omptest::AssertState::pass) + Result = "UPASS"; + } + std::cout << "\n " << std::setw(5) << Result << " : " << TC->Name; + } + std::cout << std::endl; + } +} diff --git a/openmp/tools/omptest/test/CMakeLists.txt b/openmp/tools/omptest/test/CMakeLists.txt new file mode 100644 index 0000000000000..427893313cc67 --- /dev/null +++ b/openmp/tools/omptest/test/CMakeLists.txt @@ -0,0 +1,28 @@ +##===----------------------------------------------------------------------===## +# +# Add ompTest unit tests to check-openmp. +# +##===----------------------------------------------------------------------===## + +# Target: ompTest library unit tests +file(GLOB UNITTEST_SOURCES "unittests/*.cpp") +add_executable(omptest-unittests ${UNITTEST_SOURCES}) + +# Add local and LLVM-provided GoogleTest include directories. +target_include_directories(omptest-unittests PRIVATE + ../include + ${LLVM_THIRD_PARTY_DIR}/unittest/googletest/include) + +target_link_libraries(omptest-unittests PRIVATE omptest) + +set_target_properties(omptest-unittests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + +# Add ompTest unit tests to check-openmp +add_openmp_testsuite(check-ompt-omptest "Running OMPT ompTest unit tests" + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omptest-unittests) + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by OPENMP_TOOLS_OMPTEST_TEST " + "configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) diff --git a/openmp/tools/omptest/test/lit.cfg b/openmp/tools/omptest/test/lit.cfg new file mode 100644 index 0000000000000..69c401aed83b8 --- /dev/null +++ b/openmp/tools/omptest/test/lit.cfg @@ -0,0 +1,26 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +# name: The name of this test suite. +config.name = 'OMPT ompTest' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = [''] + +# test_source_root: The root path where tests are located. +config.test_source_root = config.test_obj_root + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.test_obj_root + +# test format, match (omptest-)unittests +# Matched binaries (GoogleTests) are executed +config.test_format = lit.formats.GoogleTest(".", "unittests") diff --git a/openmp/tools/omptest/test/lit.site.cfg.in b/openmp/tools/omptest/test/lit.site.cfg.in new file mode 100644 index 0000000000000..4fa8c7e349681 --- /dev/null +++ b/openmp/tools/omptest/test/lit.site.cfg.in @@ -0,0 +1,9 @@ + at AUTO_GEN_COMMENT@ + +config.test_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp b/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp new file mode 100644 index 0000000000000..34ceb7597b791 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/asserter-seq-test.cpp @@ -0,0 +1,358 @@ +#include "OmptAliases.h" +#include "OmptAsserter.h" +#include +#include + +#include "gtest/gtest.h" + +using namespace omptest; +using OAE = omptest::OmptAssertEvent; +using OS = omptest::ObserveState; + +/// SequencedAsserter test-fixture class to avoid code duplication among tests. +class OmptSequencedAsserterTest : public testing::Test { +protected: + OmptSequencedAsserterTest() { + // Construct default sequenced asserter + SeqAsserter = std::make_unique(); + + // Silence all potential log prints + SeqAsserter->getLog()->setLoggingLevel(logging::Level::SILENT); + } + + std::unique_ptr SeqAsserter; +}; + +TEST_F(OmptSequencedAsserterTest, DefaultState) { + // Assertion should neither start as 'deactivated' nor 'suspended' + ASSERT_EQ(SeqAsserter->isActive(), true); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + + // Assertion should begin with event ID zero + ASSERT_EQ(SeqAsserter->NextEvent, 0); + + // Assertion should begin without previous notifications or assertions + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + + // There should be no expected events + ASSERT_EQ(SeqAsserter->Events.empty(), true); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + + // Default mode should be strict + ASSERT_NE(SeqAsserter->getOperationMode(), AssertMode::relaxed); + ASSERT_EQ(SeqAsserter->getOperationMode(), AssertMode::strict); + + // Default state should be passing + ASSERT_NE(SeqAsserter->getState(), AssertState::fail); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); + ASSERT_NE(SeqAsserter->checkState(), AssertState::fail); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, IgnoreNotificationsWhenEmpty) { + // ParallelBegin events are suppressed by default + auto SuppressedEvent = OAE::ParallelBegin( + /*Name=*/"ParBegin", /*Group=*/"", /*Expected=*/OS::always, + /*NumThreads=*/3); + + // DeviceFinalize events are not ignored by default + auto IgnoredEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + + // Situation: There is nothing to assert. + // Result: All notifications are ignored. + // Hence, check that the perceived count of notifications remains unchanged + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + SeqAsserter->notify(std::move(SuppressedEvent)); + + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + SeqAsserter->notify(std::move(IgnoredEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, IgnoreNotificationsWhileDeactivated) { + auto ExpectedEvent = OAE::DeviceUnload( + /*Name=*/"DevUnload", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Deactivate asserter, effectively ignoring notifications + SeqAsserter->setActive(false); + ASSERT_EQ(SeqAsserter->isActive(), false); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // DeviceFinalize events are not ignored by default + auto IgnoredEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->notify(std::move(IgnoredEvent)); + + // Assertion was deactivated: No change + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + + SeqAsserter->setActive(true); + ASSERT_EQ(SeqAsserter->isActive(), true); + + auto ObservedEvent = OAE::DeviceUnload( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->notify(std::move(ObservedEvent)); + + // Assertion was activated, one notification expected + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEvent) { + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // Sanity check: Notifications should not be triggered + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + // Adding an expected event must change the event count but not the state + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventIgnoreSuppressed) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // ParallelBegin events are suppressed by default + auto SuppressedEvent = OAE::ParallelBegin( + /*Name=*/"ParBegin", /*Group=*/"", /*Expected=*/OS::always, + /*NumThreads=*/3); + // Situation: There is one expected event and ParallelBegins are suppressed. + // Notification count remains unchanged for suppressed events + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + SeqAsserter->notify(std::move(SuppressedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObservePass) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObserveFail) { + auto ExpectedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + // Provide wrong DeviceNum + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/23); + + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + // Observed and expected event do not match: Fail + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, AddEventObserveDifferentType) { + auto ExpectedEvent = OAE::DeviceUnload( + /*Name=*/"DevUnload", /*Group=*/"", /*Expected=*/OS::always); + SeqAsserter->insert(std::move(ExpectedEvent)); + // DeviceFinalize events are not ignored by default + auto ObservedEvent = OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7); + + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + // Observed and expected event do not match: Fail + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckTargetGroupNoEffect) { + // Situation: Groups are designed to be used as an indicator -WITHIN- target + // regions. Hence, comparing two target regions w.r.t. their groups has no + // effect on pass or fail. + + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Deactivate asserter, effectively ignoring notifications + SeqAsserter->setActive(false); + ASSERT_EQ(SeqAsserter->isActive(), false); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + + // Assertion was deactivated: No change + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Re-activate asserter + SeqAsserter->setActive(true); + ASSERT_EQ(SeqAsserter->isActive(), true); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + + // Actually observe a target event from "AnotherGroup" + auto AnotherObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"AnotherGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(AnotherObservedEvent)); + + // Observed all expected events; groups of target regions do not affect pass + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} + +TEST_F(OmptSequencedAsserterTest, CheckSyncPoint) { + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + SeqAsserter->notify(OAE::AssertionSyncPoint( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::always, + /*SyncPointName=*/"SyncPoint 1")); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + + // All events processed: SyncPoint "passes" + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + auto AnotherExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 0); + SeqAsserter->insert(std::move(AnotherExpectedEvent)); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Remaining events present: SyncPoint "fails" + SeqAsserter->notify(OAE::AssertionSyncPoint( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::always, + /*SyncPointName=*/"SyncPoint 2")); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckExcessNotify) { + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + ASSERT_EQ(SeqAsserter->getRemainingEventCount(), 1); + + // Target events are not ignored by default + auto ObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + SeqAsserter->notify(std::move(ObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + // All events processed: pass + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + // Target events are not ignored by default + auto AnotherObservedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, /*DeviceNum=*/7, + /*TaskData=*/nullptr, /*TargetId=*/23, /*CodeptrRA=*/nullptr); + + // No more events expected: notify "fails" + SeqAsserter->notify(std::move(AnotherObservedEvent)); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::fail); +} + +TEST_F(OmptSequencedAsserterTest, CheckSuspend) { + SeqAsserter->insert(OAE::AssertionSuspend( + /*Name=*/"", /*Group=*/"", /*Expected=*/OS::never)); + ASSERT_EQ(SeqAsserter->Events.empty(), false); + + // Being notified while the next expected event is a "suspend" should change + // the asserter's state + ASSERT_EQ(SeqAsserter->getNotificationCount(), 0); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + SeqAsserter->notify(OAE::DeviceFinalize( + /*Name=*/"DevFini", /*Group=*/"", /*Expected=*/OS::always, + /*DeviceNum=*/7)); + ASSERT_EQ(SeqAsserter->AssertionSuspended, true); + ASSERT_EQ(SeqAsserter->getNotificationCount(), 1); + + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 0); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); + + auto ExpectedEvent = OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr); + SeqAsserter->insert(std::move(ExpectedEvent)); + + // Being notified with an observed event, which matches the next expected + // event, resumes assertion (suspended = false) + ASSERT_EQ(SeqAsserter->AssertionSuspended, true); + SeqAsserter->notify(OAE::Target( + /*Name=*/"Target", /*Group=*/"MyTargetGroup", /*Expected=*/OS::always, + /*Kind=*/TARGET, /*Endpoint=*/BEGIN, + /*DeviceNum=*/7, /*TaskData=*/nullptr, /*TargetId=*/23, + /*CodeptrRA=*/nullptr)); + ASSERT_EQ(SeqAsserter->AssertionSuspended, false); + + ASSERT_EQ(SeqAsserter->getNotificationCount(), 2); + ASSERT_EQ(SeqAsserter->getSuccessfulAssertionCount(), 1); + ASSERT_EQ(SeqAsserter->checkState(), AssertState::pass); +} diff --git a/openmp/tools/omptest/test/unittests/internal-event-test.cpp b/openmp/tools/omptest/test/unittests/internal-event-test.cpp new file mode 100644 index 0000000000000..a53025460c7e0 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/internal-event-test.cpp @@ -0,0 +1,530 @@ +#include "InternalEvent.h" +#include +#include + +#include "gtest/gtest.h" + +using namespace omptest; + +TEST(InternalEvent_toString, AssertionSyncPoint) { + internal::AssertionSyncPoint SP{/*Name=*/"Test Sync Point"}; + + EXPECT_EQ(SP.toString(), "Assertion SyncPoint: 'Test Sync Point'"); +} + +TEST(InternalEvent_toString, ThreadBegin) { + internal::ThreadBegin TB{/*ThreadType=*/ompt_thread_t::ompt_thread_initial}; + + EXPECT_EQ(TB.toString(), "OMPT Callback ThreadBegin: ThreadType=1"); +} + +TEST(InternalEvent_toString, ThreadEnd) { + internal::ThreadEnd TE{}; + + EXPECT_EQ(TE.toString(), "OMPT Callback ThreadEnd"); +} + +TEST(InternalEvent_toString, ParallelBegin) { + internal::ParallelBegin PB{/*NumThreads=*/31}; + + EXPECT_EQ(PB.toString(), "OMPT Callback ParallelBegin: NumThreads=31"); +} + +TEST(InternalEvent_toString, ParallelEnd) { + internal::ParallelEnd PE{/*ParallelData=*/(ompt_data_t *)0x11, + /*EncounteringTaskData=*/(ompt_data_t *)0x22, + /*Flags=*/31, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(PE.toString(), "OMPT Callback ParallelEnd"); +} + +TEST(InternalEvent_toString, Work) { + internal::Work WK{/*WorkType=*/ompt_work_t::ompt_work_loop_dynamic, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_beginend, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Count=*/31, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(WK.toString(), + "OMPT Callback Work: work_type=11 endpoint=3 parallel_data=0x11 " + "task_data=0x22 count=31 codeptr=0x33"); +} + +TEST(InternalEvent_toString, Dispatch_iteration) { + ompt_data_t DI{.value = 31}; + internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_iteration, + /*Instance=*/DI}; + + EXPECT_EQ(D.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=1 instance=[it=31]"); +} + +TEST(InternalEvent_toString, Dispatch_section) { + ompt_data_t DI{.ptr = (void *)0x33}; + internal::Dispatch D{/*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_section, + /*Instance=*/DI}; + + EXPECT_EQ(D.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=2 instance=[ptr=0x33]"); +} + +TEST(InternalEvent_toString, Dispatch_chunks) { + ompt_dispatch_chunk_t DC{.start = 7, .iterations = 31}; + ompt_data_t DI{.ptr = (void *)&DC}; + + internal::Dispatch DLoop{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_ws_loop_chunk, + /*Instance=*/DI}; + + internal::Dispatch DTask{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_taskloop_chunk, + /*Instance=*/DI}; + + internal::Dispatch DDist{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_distribute_chunk, + /*Instance=*/DI}; + + ompt_data_t DINull{.ptr = nullptr}; + internal::Dispatch DDistNull{ + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*Kind=*/ompt_dispatch_t::ompt_dispatch_distribute_chunk, + /*Instance=*/DINull}; + + EXPECT_EQ(DLoop.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=3 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DTask.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=4 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DDist.toString(), + "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=5 instance=[chunk=(start=7, iterations=31)]"); + + EXPECT_EQ(DDistNull.toString(), "OMPT Callback Dispatch: parallel_data=0x11 " + "task_data=0x22 kind=5"); +} + +TEST(InternalEvent_toString, TaskCreate) { + internal::TaskCreate TC{/*EncounteringTaskData=*/(ompt_data_t *)0x11, + /*EncounteringTaskFrame=*/(const ompt_frame_t *)0x22, + /*NewTaskData=*/(ompt_data_t *)0x33, + /*Flags=*/7, + /*HasDependences=*/31, + /*CodeptrRA=*/(const void *)0x44}; + + EXPECT_EQ(TC.toString(), + "OMPT Callback TaskCreate: encountering_task_data=0x11 " + "encountering_task_frame=0x22 new_task_data=0x33 flags=7 " + "has_dependences=31 codeptr=0x44"); +} + +TEST(InternalEvent_toString, ImplicitTask) { + internal::ImplicitTask IT{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*ActualParallelism=*/7, + /*Index=*/31, + /*Flags=*/127}; + + EXPECT_EQ(IT.toString(), + "OMPT Callback ImplicitTask: endpoint=1 parallel_data=0x11 " + "task_data=0x22 actual_parallelism=7 index=31 flags=127"); +} + +TEST(InternalEvent_toString, SyncRegion) { + internal::SyncRegion SR{ + /*Kind=*/ompt_sync_region_t::ompt_sync_region_taskwait, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_end, + /*ParallelData=*/(ompt_data_t *)0x11, + /*TaskData=*/(ompt_data_t *)0x22, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ(SR.toString(), "OMPT Callback SyncRegion: kind=5 endpoint=2 " + "parallel_data=0x11 task_data=0x22 codeptr=0x33"); +} + +TEST(InternalEvent_toString, Target) { + internal::Target T{/*Kind=*/ompt_target_t::ompt_target_enter_data_nowait, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_end, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)0x11, + /*TargetId=*/(ompt_id_t)31, + /*CodeptrRA=*/(const void *)0x22}; + + EXPECT_EQ(T.toString(), "Callback Target: target_id=31 kind=10 " + "endpoint=2 device_num=7 code=0x22"); +} + +TEST(InternalEvent_toString, TargetEmi) { + ompt_data_t TaskData{.value = 31}; + ompt_data_t TargetTaskData{.value = 127}; + ompt_data_t TargetData{.value = 8191}; + + internal::TargetEmi T{/*Kind=*/ompt_target_t::ompt_target_update, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)&TaskData, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*CodeptrRA=*/(const void *)0x11}; + + internal::TargetEmi TDataNull{ + /*Kind=*/ompt_target_t::ompt_target_update, + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*DeviceNum=*/7, + /*TaskData=*/(ompt_data_t *)&TaskData, + /*TargetTaskData=*/(ompt_data_t *)nullptr, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*CodeptrRA=*/(const void *)0x11}; + + std::ostringstream StreamT1; + std::ostringstream StreamT2; + std::string CallBackPrefix{ + "Callback Target EMI: kind=4 endpoint=1 device_num=7"}; + StreamT1 << CallBackPrefix << std::showbase << std::hex; + StreamT1 << " task_data=" << &TaskData << " (0x1f)"; + StreamT1 << " target_task_data=" << &TargetTaskData << " (0x7f)"; + StreamT1 << " target_data=" << &TargetData << " (0x1fff)"; + StreamT1 << " code=0x11"; + + StreamT2 << CallBackPrefix << std::showbase << std::hex; + StreamT2 << " task_data=" << &TaskData << " (0x1f)"; + StreamT2 << " target_task_data=(nil) (0x0)"; + StreamT2 << " target_data=" << &TargetData << " (0x1fff)"; + StreamT2 << " code=0x11"; + + EXPECT_EQ(T.toString(), StreamT1.str()); + EXPECT_EQ(TDataNull.toString(), StreamT2.str()); +} + +TEST(InternalEvent_toString, TargetDataOp) { + internal::TargetDataOp TDO{ + /*TargetId=*/7, + /*HostOpId=*/31, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_associate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/127, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/8191, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + EXPECT_EQ( + TDO.toString(), + " Callback DataOp: target_id=7 host_op_id=31 optype=5 src=0x11 " + "src_device_num=127 dest=0x22 dest_device_num=8191 bytes=4096 code=0x33"); +} + +TEST(InternalEvent_toString, TargetDataOpEmi) { + ompt_data_t TargetTaskData{.value = 31}; + ompt_data_t TargetData{.value = 127}; + ompt_id_t HostOpId = 8191; + + internal::TargetDataOpEmi TDO{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)&HostOpId, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_disassociate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/1, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/2, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + // Set HostOpId=nullptr + internal::TargetDataOpEmi TDO_HostOpIdNull{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetTaskData=*/(ompt_data_t *)&TargetTaskData, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)nullptr, + /*OpType=*/ompt_target_data_op_t::ompt_target_data_disassociate, + /*SrcAddr=*/(void *)0x11, + /*SrcDeviceNum=*/1, + /*DstAddr=*/(void *)0x22, + /*DstDeviceNum=*/2, + /*Bytes=*/4096, + /*CodeptrRA=*/(const void *)0x33}; + + std::ostringstream StreamTDO1; + std::ostringstream StreamTDO2; + std::string CallBackPrefix{" Callback DataOp EMI: endpoint=1 optype=6"}; + std::string CallBackSuffix{ + " src=0x11 src_device_num=1 dest=0x22 dest_device_num=2 " + "bytes=4096 code=0x33"}; + StreamTDO1 << CallBackPrefix << std::showbase << std::hex; + StreamTDO1 << " target_task_data=" << &TargetTaskData << " (0x1f)"; + StreamTDO1 << " target_data=" << &TargetData << " (0x7f)"; + StreamTDO1 << " host_op_id=" << &HostOpId << " (0x1fff)"; + StreamTDO1 << CallBackSuffix; + + StreamTDO2 << CallBackPrefix << std::showbase << std::hex; + StreamTDO2 << " target_task_data=" << &TargetTaskData << " (0x1f)"; + StreamTDO2 << " target_data=" << &TargetData << " (0x7f)"; + StreamTDO2 << " host_op_id=(nil) (0x0)"; + StreamTDO2 << CallBackSuffix; + + EXPECT_EQ(TDO.toString(), StreamTDO1.str()); + EXPECT_EQ(TDO_HostOpIdNull.toString(), StreamTDO2.str()); +} + +TEST(InternalEvent_toString, TargetSubmit) { + internal::TargetSubmit TS{/*TargetId=*/7, + /*HostOpId=*/31, + /*RequestedNumTeams=*/127}; + + EXPECT_EQ(TS.toString(), + " Callback Submit: target_id=7 host_op_id=31 req_num_teams=127"); +} + +TEST(InternalEvent_toString, TargetSubmitEmi) { + ompt_data_t TargetData{.value = 127}; + ompt_id_t HostOpId = 8191; + internal::TargetSubmitEmi TS{ + /*Endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*TargetData=*/(ompt_data_t *)&TargetData, + /*HostOpId=*/(ompt_id_t *)&HostOpId, + /*RequestedNumTeams=*/7}; + + std::ostringstream StreamTS; + std::string CallBackPrefix{ + " Callback Submit EMI: endpoint=1 req_num_teams=7"}; + StreamTS << CallBackPrefix << std::showbase << std::hex; + StreamTS << " target_data=" << &TargetData << " (0x7f)"; + StreamTS << " host_op_id=" << &HostOpId << " (0x1fff)"; + + EXPECT_EQ(TS.toString(), StreamTS.str()); +} + +TEST(InternalEvent_toString, DeviceInitialize) { + const char *Type = "DeviceType"; + const char *DocStr = "DocumentationString"; + + internal::DeviceInitialize DI{/*DeviceNum=*/7, + /*Type=*/Type, + /*Device=*/(ompt_device_t *)0x11, + /*LookupFn=*/(ompt_function_lookup_t)0x22, + /*DocStr=*/DocStr}; + + internal::DeviceInitialize DINull{/*DeviceNum=*/0, + /*Type=*/nullptr, + /*Device=*/nullptr, + /*LookupFn=*/(ompt_function_lookup_t)0x0, + /*DocStr=*/nullptr}; + + std::ostringstream StreamDI; + std::string CallBackPrefix{"Callback Init: device_num=7 type=DeviceType " + "device=0x11 lookup=0x22 doc="}; + StreamDI << CallBackPrefix << std::showbase << std::hex; + StreamDI << (uint64_t)DocStr; + EXPECT_EQ(DI.toString(), StreamDI.str()); + + // TODO This looks inconsistent: (null) vs. (nil) + EXPECT_EQ(DINull.toString(), "Callback Init: device_num=0 type=(null) " + "device=(nil) lookup=(nil) doc=(nil)"); +} + +TEST(InternalEvent_toString, DeviceFinalize) { + internal::DeviceFinalize DF{/*DeviceNum=*/7}; + + EXPECT_EQ(DF.toString(), "Callback Fini: device_num=7"); +} + +TEST(InternalEvent_toString, DeviceLoad) { + const char *Filename = "FilenameToLoad"; + + internal::DeviceLoad DL{/*DeviceNum=*/7, + /*Filename=*/Filename, + /*OffsetInFile=*/31, + /*VmaInFile=*/(void *)0x11, + /*Bytes=*/127, + /*HostAddr=*/(void *)0x22, + /*DeviceAddr=*/(void *)0x33, + /*ModuleId=*/8191}; + + internal::DeviceLoad DLNull{/*DeviceNum=*/0, + /*Filename=*/nullptr, + /*OffsetInFile=*/0, + /*VmaInFile=*/nullptr, + /*Bytes=*/0, + /*HostAddr=*/nullptr, + /*DeviceAddr=*/nullptr, + /*ModuleId=*/0}; + + EXPECT_EQ( + DL.toString(), + "Callback Load: device_num:7 module_id:8191 " + "filename:FilenameToLoad host_adddr:0x22 device_addr:0x33 bytes:127"); + + // TODO This looks inconsistent: (null) vs. (nil) and ':' instead of '=' + EXPECT_EQ(DLNull.toString(), + "Callback Load: device_num:0 module_id:0 filename:(null) " + "host_adddr:(nil) device_addr:(nil) bytes:0"); +} + +TEST(InternalEvent_toString, BufferRequest) { + size_t Bytes = 7; + ompt_buffer_t *Buffer = (void *)0x11; + + internal::BufferRequest BR{/*DeviceNum=*/31, + /*Buffer=*/&Buffer, + /*Bytes=*/&Bytes}; + + internal::BufferRequest BRNull{/*DeviceNum=*/127, + /*Buffer=*/nullptr, + /*Bytes=*/nullptr}; + + EXPECT_EQ(BR.toString(), + "Allocated 7 bytes at 0x11 in buffer request callback"); + EXPECT_EQ(BRNull.toString(), + "Allocated 0 bytes at (nil) in buffer request callback"); +} + +TEST(InternalEvent_toString, BufferComplete) { + ompt_buffer_t *Buffer = (void *)0x11; + + internal::BufferComplete BC{/*DeviceNum=*/7, + /*Buffer=*/Buffer, + /*Bytes=*/127, + /*Begin=*/8191, + /*BufferOwned=*/1}; + + internal::BufferComplete BCNull{/*DeviceNum=*/0, + /*Buffer=*/nullptr, + /*Bytes=*/0, + /*Begin=*/0, + /*BufferOwned=*/0}; + + EXPECT_EQ(BC.toString(), + "Executing buffer complete callback: 7 0x11 127 0x1fff 1"); + EXPECT_EQ(BCNull.toString(), + "Executing buffer complete callback: 0 (nil) 0 (nil) 0"); +} + +TEST(InternalEvent_toString, BufferRecordInvalid) { + ompt_record_ompt_t InvalidRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_parallel_begin, + /*time=*/7, + /*thread_id=*/31, + /*target_id=*/127, + /*record=*/{.parallel_begin = {}}}; + + internal::BufferRecord BRNull{/*RecordPtr=*/nullptr}; + internal::BufferRecord BRInvalid{/*RecordPtr=*/&InvalidRecord}; + + std::ostringstream StreamBRInvalid; + StreamBRInvalid << "rec=" << std::showbase << std::hex << &InvalidRecord; + StreamBRInvalid << " type=3 (unsupported record type)"; + + EXPECT_EQ(BRNull.toString(), "rec=(nil) type=0 (unsupported record type)"); + EXPECT_EQ(BRInvalid.toString(), StreamBRInvalid.str()); +} + +TEST(InternalEvent_toString, BufferRecordTarget) { + ompt_record_target_t SubRecordTarget{ + /*kind=*/ompt_target_t::ompt_target_update, + /*endpoint=*/ompt_scope_endpoint_t::ompt_scope_begin, + /*device_num=*/2, + /*task_id=*/127, + /*target_id=*/31, + /*codeptr_ra=*/(const void *)0x11}; + + ompt_record_ompt_t TargetRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target, + /*time=*/7, + /*thread_id=*/29, + /*target_id=*/31, + /*record*/ {.target = SubRecordTarget}}; + + internal::BufferRecord BR{/*RecordPtr=*/&TargetRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &TargetRecord; + StreamBR << " type=8 (Target task) time=7 thread_id=29 target_id=31 kind=4"; + StreamBR << " endpoint=1 device=2 task_id=127 codeptr=0x11"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordDataOp) { + ompt_record_target_data_op_t SubRecordTargetDataOp{ + /*host_op_id=*/7, + /*optype=*/ompt_target_data_op_t::ompt_target_data_alloc_async, + /*src_addr=*/(void *)0x11, + /*src_device_num=*/1, + /*dest_addr=*/(void *)0x22, + /*dest_device_num=*/2, + /*bytes=*/127, + /*end_time=*/128, + /*codeptr_ra=*/(const void *)0x33, + }; + + ompt_record_ompt_t DataOpRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target_data_op_emi, + /*time=*/8, + /*thread_id=*/3, + /*target_id=*/5, + /*record=*/{.target_data_op = SubRecordTargetDataOp}}; + + internal::BufferRecord BR{/*RecordPtr=*/&DataOpRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &DataOpRecord; + StreamBR << " type=34 (Target data op) time=8 thread_id=3 target_id=5"; + StreamBR << " host_op_id=7 optype=17 src_addr=0x11 src_device=1"; + StreamBR << " dest_addr=0x22 dest_device=2 bytes=127 end_time=128"; + StreamBR << " duration=120 ns codeptr=0x33"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordKernel) { + ompt_record_target_kernel_t SubRecordTargetKernel{ + /*host_op_id=*/11, + /*requested_num_teams=*/127, + /*granted_num_teams=*/63, + /*end_time=*/8191, + }; + + ompt_record_ompt_t KernelRecord{ + /*type=*/ompt_callbacks_t::ompt_callback_target_submit_emi, + /*time=*/9, + /*thread_id=*/19, + /*target_id=*/33, + /*record=*/{.target_kernel = SubRecordTargetKernel}}; + + internal::BufferRecord BR{/*RecordPtr=*/&KernelRecord}; + + std::ostringstream StreamBR; + StreamBR << "rec=" << std::showbase << std::hex << &KernelRecord; + StreamBR << " type=35 (Target kernel) time=9 thread_id=19 target_id=33"; + StreamBR << " host_op_id=11 requested_num_teams=127 granted_num_teams=63"; + StreamBR << " end_time=8191 duration=8182 ns"; + + EXPECT_EQ(BR.toString(), StreamBR.str()); +} + +TEST(InternalEvent_toString, BufferRecordDeallocation) { + internal::BufferRecordDeallocation BRD{/*Buffer=*/(ompt_record_ompt_t *)0x11}; + internal::BufferRecordDeallocation BRDNull{/*Buffer=*/nullptr}; + + EXPECT_EQ(BRD.toString(), "Deallocated 0x11"); + EXPECT_EQ(BRDNull.toString(), "Deallocated (nil)"); +} diff --git a/openmp/tools/omptest/test/unittests/internal-util-test.cpp b/openmp/tools/omptest/test/unittests/internal-util-test.cpp new file mode 100644 index 0000000000000..6a9868b85c3a3 --- /dev/null +++ b/openmp/tools/omptest/test/unittests/internal-util-test.cpp @@ -0,0 +1,95 @@ +#include "InternalEvent.h" +#include + +#include "gtest/gtest.h" + +using namespace omptest; + +TEST(InternalUtility, ExpectedDefault_Integer) { + // int: -2147483648 (decimal) = 0x80000000 (hexadecimal) + EXPECT_EQ(expectedDefault(int), 0x80000000); + EXPECT_EQ(expectedDefault(int), (0x1 << 31)); + // int64_t: -9223372036854775808 (decimal) = 0x8000000000000000 (hexadecimal) + EXPECT_EQ(expectedDefault(int64_t), 0x8000000000000000); + EXPECT_EQ(expectedDefault(int64_t), (0x1L << 63)); +} + +TEST(InternalUtility, ExpectedDefault_Zero) { + // Expectedly zero + EXPECT_EQ(expectedDefault(size_t), 0); + EXPECT_EQ(expectedDefault(unsigned int), 0); + EXPECT_EQ(expectedDefault(ompt_id_t), 0); + EXPECT_EQ(expectedDefault(ompt_dispatch_t), 0); + EXPECT_EQ(expectedDefault(ompt_device_time_t), 0); +} + +TEST(InternalUtility, ExpectedDefault_Nullpointer) { + // Expectedly nullptr + EXPECT_EQ(expectedDefault(const char *), nullptr); + EXPECT_EQ(expectedDefault(const void *), nullptr); + EXPECT_EQ(expectedDefault(int *), nullptr); + EXPECT_EQ(expectedDefault(void *), nullptr); + EXPECT_EQ(expectedDefault(ompt_data_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_device_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_frame_t *), nullptr); + EXPECT_EQ(expectedDefault(ompt_function_lookup_t), nullptr); + EXPECT_EQ(expectedDefault(ompt_id_t *), nullptr); +} + +TEST(InternalUtility, MakeHexString_PointerValues) { + // IsPointer should only affect zero value + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/true), "(nil)"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false), "0x0"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true), "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/false), "0xff"); +} + +TEST(InternalUtility, MakeHexString_MinimumBytes) { + // Return a minimum length, based on the (minimum) requested bytes + EXPECT_EQ(util::makeHexString(15, /*IsPointer=*/true, /*MinBytes=*/0), "0xf"); + EXPECT_EQ(util::makeHexString(15, /*IsPointer=*/true, /*MinBytes=*/1), + "0x0f"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/0), + "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/1), + "0xff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/2), + "0x00ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/3), + "0x0000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/4), + "0x000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/5), + "0x00000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/6), + "0x0000000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/7), + "0x000000000000ff"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/8), + "0x00000000000000ff"); + + // Default to four bytes, if request exceeds eight byte range + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, /*MinBytes=*/9), + "0x000000ff"); + + // Disregard requested minimum byte width, if actual value exceeds it + EXPECT_EQ(util::makeHexString(1024, /*IsPointer=*/true, /*MinBytes=*/1), + "0x400"); +} + +TEST(InternalUtility, MakeHexString_HexBase) { + // Cut off "0x" when requested + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/true, /*MinBytes=*/0, + /*ShowHexBase=*/false), + "(nil)"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false, /*MinBytes=*/0, + /*ShowHexBase=*/false), + "0"); + EXPECT_EQ(util::makeHexString(0, /*IsPointer=*/false, /*MinBytes=*/1, + /*ShowHexBase=*/false), + "00"); + EXPECT_EQ(util::makeHexString(255, /*IsPointer=*/true, + /*MinBytes=*/2, + /*ShowHexBase=*/false), + "00ff"); +} diff --git a/openmp/tools/omptest/test/unittests/main-test.cpp b/openmp/tools/omptest/test/unittests/main-test.cpp new file mode 100644 index 0000000000000..2eba663e49c8e --- /dev/null +++ b/openmp/tools/omptest/test/unittests/main-test.cpp @@ -0,0 +1,141 @@ +#include "OmptAssertEvent.h" +#include "OmptAsserter.h" +#include "OmptTester.h" +#include + +#include "gtest/gtest.h" + +using OS = omptest::ObserveState; +using OAE = omptest::OmptAssertEvent; + +TEST(CompareOperatorTests, ThreadBeginIdentity) { + auto TBInitial = + OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_initial); + auto TBWorker = OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_worker); + auto TBOther = OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_other); + auto TBUnknown = + OAE::ThreadBegin("dflt", "", OS::always, ompt_thread_unknown); + + ASSERT_EQ(TBInitial, TBInitial); + ASSERT_EQ(TBWorker, TBWorker); + ASSERT_EQ(TBOther, TBOther); + ASSERT_EQ(TBUnknown, TBUnknown); +} + +TEST(CompareOperatorTests, ThreadEndIdentity) { + auto TE = OAE::ThreadEnd("dflt", "", OS::always); + + ASSERT_EQ(TE, TE); +} + +TEST(CompareOperatorTests, ParallelBeginIdentity) { + auto PBNumT = OAE::ParallelBegin("thrdenable", "", OS::always, 3); + + ASSERT_EQ(PBNumT, PBNumT); +} + +TEST(CompareOperatorTests, ParallelEndIdentity) { + auto PEDflt = OAE::ParallelEnd("dflt", "", OS::always); + // TODO: Add cases with parallel data set, task data set, flags + + ASSERT_EQ(PEDflt, PEDflt); +} + +TEST(CompareOperatorTests, WorkIdentity) { + auto WDLoopBgn = + OAE::Work("loopbgn", "", OS::always, ompt_work_loop, ompt_scope_begin); + auto WDLoopEnd = + OAE::Work("loobend", "", OS::always, ompt_work_loop, ompt_scope_end); + + ASSERT_EQ(WDLoopBgn, WDLoopBgn); + ASSERT_EQ(WDLoopEnd, WDLoopEnd); + + auto WDSectionsBgn = OAE::Work("sectionsbgn", "", OS::always, + ompt_work_sections, ompt_scope_begin); + auto WDSectionsEnd = OAE::Work("sectionsend", "", OS::always, + ompt_work_sections, ompt_scope_end); + + // TODO: singleexecutor, single_other, workshare, distribute, taskloop, scope, + // loop_static, loop_dynamic, loop_guided, loop_other + + ASSERT_EQ(WDSectionsBgn, WDSectionsBgn); + ASSERT_EQ(WDSectionsEnd, WDSectionsEnd); +} + +TEST(CompareOperatorTests, DispatchIdentity) { + auto DIDflt = OAE::Dispatch("dflt", "", OS::always); + + ASSERT_EQ(DIDflt, DIDflt); +} + +TEST(CompareOperatorTests, TaskCreateIdentity) { + auto TCDflt = OAE::TaskCreate("dflt", "", OS::always); + + ASSERT_EQ(TCDflt, TCDflt); +} + +TEST(CompareOperatorTests, TaskScheduleIdentity) { + auto TS = OAE::TaskSchedule("dflt", "", OS::always); + + ASSERT_EQ(TS, TS); +} + +TEST(CompareOperatorTests, ImplicitTaskIdentity) { + auto ITDfltBgn = + OAE::ImplicitTask("dfltbgn", "", OS::always, ompt_scope_begin); + auto ITDfltEnd = OAE::ImplicitTask("dfltend", "", OS::always, ompt_scope_end); + + ASSERT_EQ(ITDfltBgn, ITDfltBgn); + ASSERT_EQ(ITDfltEnd, ITDfltEnd); +} + +TEST(CompareOperatorTests, SyncRegionIdentity) { + auto SRDfltBgn = + OAE::SyncRegion("srdfltbgn", "", OS::always, + ompt_sync_region_barrier_explicit, ompt_scope_begin); + auto SRDfltEnd = + OAE::SyncRegion("srdfltend", "", OS::always, + ompt_sync_region_barrier_explicit, ompt_scope_end); + + ASSERT_EQ(SRDfltBgn, SRDfltBgn); + ASSERT_EQ(SRDfltEnd, SRDfltEnd); +} + +TEST(CompareOperatorTests, TargetIdentity) { + auto TargetDfltBgn = + OAE::Target("dfltbgn", "", OS::always, ompt_target, ompt_scope_begin); + auto TargetDfltEnd = + OAE::Target("dfltend", "", OS::always, ompt_target, ompt_scope_end); + + ASSERT_EQ(TargetDfltBgn, TargetDfltBgn); + ASSERT_EQ(TargetDfltEnd, TargetDfltEnd); + + auto TargetDevBgn = OAE::Target("tgtdevbgn", "", OS::always, ompt_target, + ompt_scope_begin, 1); + auto TargetDevEnd = + OAE::Target("tgtdevend", "", OS::always, ompt_target, ompt_scope_end, 1); + + ASSERT_EQ(TargetDevBgn, TargetDevBgn); + ASSERT_EQ(TargetDevEnd, TargetDevEnd); +} + +TEST(CompareOperatorTests, BufferRecordIdentity) { + // Default, no time limit or anything + auto BRDflt = + OAE::BufferRecord("dflt", "", OS::always, ompt_callback_target_submit); + + // Minimum time set, no max time + auto BRMinSet = OAE::BufferRecord("minset", "", OS::always, + ompt_callback_target_submit, 10); + + // Minimum time and maximum time set + auto BRMinMaxSet = OAE::BufferRecord("minmaxset", "", OS::always, + ompt_callback_target_submit, {10, 100}); + + ASSERT_EQ(BRDflt, BRDflt); + ASSERT_EQ(BRMinSet, BRMinSet); + ASSERT_EQ(BRMinMaxSet, BRMinMaxSet); +} + +// Add main definition +OMPTEST_TESTSUITE_MAIN() From openmp-commits at lists.llvm.org Tue Jul 22 07:56:01 2025 From: openmp-commits at lists.llvm.org (via Openmp-commits) Date: Tue, 22 Jul 2025 07:56:01 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687fa681.170a0220.32fb4.abdb@mx.google.com> github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning:
You can test this locally with the following command: ``````````bash git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- clang/include/clang/Driver/Driver.h clang/include/clang/Driver/ToolChain.h clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChain.cpp clang/lib/Driver/ToolChains/Flang.cpp flang/include/flang/Frontend/CompilerInvocation.h flang/lib/Frontend/CompilerInvocation.cpp flang/tools/bbc/bbc.cpp ``````````
View the diff from clang-format here. ``````````diff diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 451521eb7..69365dd27 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -402,7 +402,6 @@ private: SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the ``````````
https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Tue Jul 22 08:07:59 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 08:07:59 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687fa94f.630a0220.290657.911b@mx.google.com> ================ @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty ---------------- Meinersbur wrote: @klausler If building using a bootstrapping configuration (`-DLLVM_ENABLE_RUNTIMES=flang-rt`, implicitly added when `-DLLVM_ENABLE_PROJECTS=flang`), the mod files are automatically put into the Clang/Flang resource directory as a dependency of `check-flang`. If explictly disabled using `-DFLANG_ENABLE_FLANG_RT=OFF` or in a Flang-standalone build (that currently does not support LLVM_ENABLE_RUNTIMES), Flang-RT can be compiled separately and `FLANG_INTRINSIC_MODULES_DIR` pointed to it. `lit.cfg.py` will add the path via `-fintrinsic-module-path` to each `flang`/`%flang_fc1` call. If FLANG_INTRINSIC_MODULES_DIR is not set then tests in `test/` will be skipped. https://github.com/llvm/llvm-project/pull/137828 From openmp-commits at lists.llvm.org Tue Jul 22 08:12:54 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 08:12:54 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687faa76.a70a0220.12dd05.b499@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 01/15] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 02/15] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 03/15] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 04/15] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 05/15] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 06/15] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 07/15] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 08/15] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 8ec6d03939c3a7e2f79bb3ea5911b7607028dd7c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 21:52:02 +0200 Subject: [PATCH 09/15] Also add dependency barrier for WIN32 --- flang-rt/lib/runtime/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 1b8114c102205..20f5d84bb2b69 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -281,14 +281,18 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") - add_flangrt_library(${name}.intrinsics OBJECT + add_flangrt_library(${name}.intrinsics.obj OBJECT ${intrinsics_sources} ) + add_custom_target(${name}.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) add_flangrt_library(${name} ${libtype} - ${sources} $ + ${sources} $ ${ARGN} - LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -311,8 +315,9 @@ else() endif () get_target_property(compile_target ${name}.compile ALIASED_TARGET) - flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) + add_dependencies(${compile_target} ${name}.intrinsics) add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") >From 580a0c56535a9cd1c65fd5a5e6309b73c36ae8b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:05:34 +0200 Subject: [PATCH 10/15] Dependency barrier test --- flang-rt/lib/runtime/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 20f5d84bb2b69..bdd4318832473 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,6 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics + COMMAND echo "Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) @@ -295,6 +296,7 @@ else() LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) + get_target_property(compile_target ${name}.compile ALIASED_TARGET) if (msvc_lib) set_target_properties(${name} @@ -311,14 +313,13 @@ else() set(is_public "") else () set(is_public PUBLIC) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) set(_has_public_intrinsics "YES" PARENT_SCOPE) endif () - get_target_property(compile_target ${name}.compile ALIASED_TARGET) flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) add_dependencies(${compile_target} ${name}.intrinsics) - add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") >From 979691a5bba4888be8c7c82d1bed4e8cdc71fff9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:06:40 +0200 Subject: [PATCH 11/15] Dependency barrier info --- flang-rt/lib/runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index bdd4318832473..24b84b11f9513 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,7 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics - COMMAND echo "Dependency barrier" + COMMAND echo "${name} Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) >From b4adeab58e615b9059c0a6e5bbcb376d1fe21bb2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 21 Jul 2025 21:56:26 +0200 Subject: [PATCH 12/15] Avoid unrelated changes --- clang/include/clang/Driver/Driver.h | 1 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 9343fed36b6ac..14e1e644e51aa 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,6 +403,7 @@ class Driver { SmallString<128> &CrashDiagDir); public: + /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 688b5aacc4bcd..609ba27bc212b 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1376,10 +1376,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError( - loc, llvm::Twine("runtime derived type info descriptor of '") + name + - "' was not generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError(loc, + "runtime derived type info descriptor was not " + "generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition >From c40f43c98ff7ddbcb52c1ac35210320926369b2c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 14:16:19 +0200 Subject: [PATCH 13/15] Use -fintrinsic-modules-path= --- flang/test/lit.cfg.py | 20 +++----------------- runtimes/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index b05eba8da0b0c..bbf9a5a9f277f 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -147,26 +147,18 @@ def get_resource_module_intrinsic_dir(): intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] + intrinsics_search_args += [f"-fintrinsic-modules-path={flang_intrinsics_dir}"] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: extra_intrinsics_search_args += [ - "-fintrinsic-modules-path", - config.flang_intrinsic_modules_dir, + f"-fintrinsic-modules-path={config.flang_intrinsic_modules_dir}", ] lit_config.note( f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" ) -config.substitutions.append( - ( - "%intrinsic_module_flags", - " ".join(intrinsics_search_args + extra_intrinsics_search_args), - ) -) - # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ @@ -193,13 +185,7 @@ def get_resource_module_intrinsic_dir(): "%bbc_bare", command=FindTool("bbc"), unresolved="fatal", - ), - ToolSubst( - "%flang_bare", - command=FindTool("flang"), - extra_args=isysroot_flag, - unresolved="fatal", - ), + ) ] # Flang has several unimplemented features. TODO messages are used to mark diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 51214a46f558e..0832767505fe1 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -454,7 +454,7 @@ function (flang_module_target tgtname) # Let it find the other public module files target_compile_options(${tgtname} PRIVATE - "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + "$<$:-fintrinsic-modules-path=${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" ) if (ARG_PUBLIC) >From 18422d4d6ea24aeb29a4fece11ba7b509fbd71d6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 16:50:58 +0200 Subject: [PATCH 14/15] Rework intrinsic-module-path.f90 test --- flang/test/Driver/Inputs/ieee_arithmetic.mod | 1 + flang/test/Driver/Inputs/iso_fortran_env.mod | 1 + flang/test/Driver/intrinsic-module-path.f90 | 54 ++++++++++++++++---- flang/test/lit.cfg.py | 2 +- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/flang/test/Driver/Inputs/ieee_arithmetic.mod b/flang/test/Driver/Inputs/ieee_arithmetic.mod index 30fd57801970b..451d1af62a941 100644 --- a/flang/test/Driver/Inputs/ieee_arithmetic.mod +++ b/flang/test/Driver/Inputs/ieee_arithmetic.mod @@ -1,5 +1,6 @@ ! DUMMY module ! Added for testing purposes. The contents of this file are currently not relevant. +! Using this file file cause an error because of missing checksum module ieee_arithmetic type::ieee_round_type integer(1),private::mode=0_1 diff --git a/flang/test/Driver/Inputs/iso_fortran_env.mod b/flang/test/Driver/Inputs/iso_fortran_env.mod index 689297d52027b..ad501c2d9c1b8 100644 --- a/flang/test/Driver/Inputs/iso_fortran_env.mod +++ b/flang/test/Driver/Inputs/iso_fortran_env.mod @@ -1,5 +1,6 @@ ! DUMMY module ! Added for testing purposes. The contents of this file are currently not relevant. +! Using this file file cause an error because of missing checksum module iso_fortran_env use __fortran_builtins,only:event_type=>__builtin_event_type use __fortran_builtins,only:lock_type=>__builtin_lock_type diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 3e696ce5d08e0..3317eb776f0a1 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -1,23 +1,55 @@ ! Ensure argument -fintrinsic-modules-path works as expected. -! WITHOUT the option, the default location for the module is checked and no error generated. -! With the option GIVEN, the module with the same name is PREPENDED, and considered over the -! default one, causing a CHECKSUM error. !----------------------------------------- -! FRONTEND FLANG DRIVER (flang -fc1) +! FLANG DRIVER !----------------------------------------- -! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty -! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN -! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path=%S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN +! NOTE: Depending on how Flang is built, the default intrinsics may have higher +! or lower priority than -fintrinsic-modules-path added here. Using +! basictestmoduleone.mod from Inputs/module-dir/ will trigger an error. -! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found -! WITHOUT-NOT: 'iso_fortran_env.mod' was not found +! RUN: %flang -fsyntax-only -### %s 2>&1 | FileCheck %s --check-prefix=DEFAULTPATH -! GIVEN: error: Cannot use module file for module 'ieee_arithmetic': File has invalid checksum -! GIVEN: error: Cannot use module file for module 'iso_fortran_env': File has invalid checksum +! RUN: %flang -fsyntax-only -DINTRINSICS_DEFAULT %s +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTONE %s 2>&1 | FileCheck %s --check-prefix=NOINPUTONE +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTTWO %s 2>&1 | FileCheck %s --check-prefix=NOINPUTTWO +! RUN: %flang -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: %flang -fsyntax-only -DINTRINSICS_INPUTONE -fintrinsic-modules-path=%S/Inputs/ %s +! RUN: %flang -fsyntax-only -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/ -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir/ -fintrinsic-modules-path=%S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=WRONGINPUTONE +!----------------------------------------- +! FLANG FRONTEND (flang -fc1) +!----------------------------------------- +! NOTE: %flang_cc1 the default intrinsics path always has higher priority than +! -fintrinsic-modules-path added here. Accidentally using +! ieee_arithmetic/iso_fortran_env from the Inputs/ directory will trigger +! an error (e.g. when the default intrinsics dir is empty). + +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT %s +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE %s 2>&1 | FileCheck %s --check-prefix=NOINPUTONE +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO %s 2>&1 | FileCheck %s --check-prefix=NOINPUTTWO +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir %s +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -fintrinsic-modules-path=%S/Inputs/ %s +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/ -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir -fintrinsic-modules-path=%S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=WRONGINPUTONE + + +! DEFAULTPATH: flang{{.*}}-fc1{{.*}}-fintrinsic-modules-path + +! NOINPUTONE: Source file 'basictestmoduleone.mod' was not found +! NOINPUTTWO: Source file 'basictestmoduletwo.mod' was not found +! WRONGINPUTONE: 't1' not found in module 'basictestmoduleone' + program test_intrinsic_module_path +#ifdef INTRINSICS_DEFAULT use ieee_arithmetic, only: ieee_round_type use iso_fortran_env, only: team_type, event_type, lock_type +#endif +#ifdef INTRINSICS_INPUTONE + use basictestmoduleone, only: t1 +#endif +#ifdef INTRINSICS_INPUTTWO + use basictestmoduletwo, only: t2 +#endif end program diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index bbf9a5a9f277f..ba1e5d55b503c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -180,7 +180,7 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly + # Do not implicitly add intrinsic search paths ToolSubst( "%bbc_bare", command=FindTool("bbc"), >From 9143388ef4c1c2bc9b09595c69ed85eac1daf2c8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 17:12:30 +0200 Subject: [PATCH 15/15] Remove hint to reduce diff size --- clang/include/clang/Driver/Driver.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 451521eb70760..78a4c5738ae66 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -405,8 +405,6 @@ class Driver { /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. - /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the - /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, From openmp-commits at lists.llvm.org Tue Jul 22 08:32:43 2025 From: openmp-commits at lists.llvm.org (=?UTF-8?Q?Michael_Halkenh=C3=A4user?= via Openmp-commits) Date: Tue, 22 Jul 2025 08:32:43 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Add ompTest library to OpenMP (PR #147381) In-Reply-To: Message-ID: <687faf1b.170a0220.2ce749.6e26@mx.google.com> ================ @@ -0,0 +1,331 @@ +//===- InternalEvent.h - Internal event representation ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Declares internal event representations along the default CTOR definition. +/// +//===----------------------------------------------------------------------===// + +#ifndef OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H +#define OPENMP_TOOLS_OMPTEST_INCLUDE_INTERNALEVENT_H + +#include "InternalEventCommon.h" + +#include +#include +#include + +#define expectedDefault(TypeName) std::numeric_limits::min() + +namespace omptest { + +namespace util { + +/// String manipulation helper function. Takes up to 8 bytes of data and returns +/// their hexadecimal representation as string. The data can be expanded to the +/// given size in bytes and will by default be prefixed with '0x'. +std::string makeHexString(uint64_t Data, bool IsPointer = true, + size_t DataBytes = 0, bool ShowHexBase = true); + +} // namespace util + +namespace internal { +// clang-format off +event_class_w_custom_body(AssertionSyncPoint, \ ---------------- mhalk wrote: With the commit I've just pushed there are way less macros involved, a significant part traded in for templates. That way, we're also cutting down the amount of boilerplate / default implementations. https://github.com/llvm/llvm-project/pull/147381 From openmp-commits at lists.llvm.org Tue Jul 22 08:34:38 2025 From: openmp-commits at lists.llvm.org (Joseph Huber via Openmp-commits) Date: Tue, 22 Jul 2025 08:34:38 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OpenMP] Fix runtimes default build (PR #149871) In-Reply-To: Message-ID: <687faf8e.170a0220.31286c.7f81@mx.google.com> https://github.com/jhuber6 approved this pull request. https://github.com/llvm/llvm-project/pull/149871 From openmp-commits at lists.llvm.org Tue Jul 22 08:34:38 2025 From: openmp-commits at lists.llvm.org (Joseph Huber via Openmp-commits) Date: Tue, 22 Jul 2025 08:34:38 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OpenMP] Fix runtimes default build (PR #149871) In-Reply-To: Message-ID: <687faf8e.170a0220.3a4269.b6c7@mx.google.com> ================ @@ -92,6 +92,15 @@ include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrap build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + + ---------------- jhuber6 wrote: ```suggestion ``` https://github.com/llvm/llvm-project/pull/149871 From openmp-commits at lists.llvm.org Tue Jul 22 08:36:23 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 08:36:23 -0700 (PDT) Subject: [Openmp-commits] [llvm] [openmp] [OpenMP] Fix runtimes default build (PR #149871) In-Reply-To: Message-ID: <687faff7.170a0220.124923.b50f@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/149871 >From dbfe5a8cfe456d0843fde0477a4dd8ed26e10e13 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 21 Jul 2025 18:54:26 +0200 Subject: [PATCH 1/2] [OpenMP] default build fix --- openmp/CMakeLists.txt | 2 +- openmp/runtime/src/CMakeLists.txt | 2 +- runtimes/CMakeLists.txt | 9 +++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..ab34851d8961c 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -115,7 +115,7 @@ option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." option(OPENMP_ENABLE_LIBOMP_PROFILING "Enable time profiling for libomp." OFF) # Header install location -if(${OPENMP_STANDALONE_BUILD}) +if(NOT LLVM_TREE_AVAILABLE) set(LIBOMP_HEADERS_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}") else() include(GetClangResourceDir) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..569061c6494b8 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -11,7 +11,7 @@ include(ExtendPath) # The generated headers will be placed in clang's resource directory if present. -if(OPENMP_STANDALONE_BUILD OR NOT LLVM_RUNTIMES_BUILD) +if(NOT LLVM_TREE_AVAILABLE) set(LIBOMP_HEADERS_INTDIR ${CMAKE_CURRENT_BINARY_DIR}) else() set(LIBOMP_HEADERS_INTDIR ${LLVM_BINARY_DIR}/${LIBOMP_HEADERS_INSTALL_PATH}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..051eb5991f6f6 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -92,6 +92,15 @@ include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrap build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior # is undesirable. Filtering CMAKE_{LANG}_IMPLICIT_INCLUDE_DIRECTORIES to remove >From 9fce817ded96f37f9ae7f0f774b1fcc31948ddae Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 17:36:15 +0200 Subject: [PATCH 2/2] Apply change suggested by @jhuber6 Co-authored-by: Joseph Huber --- runtimes/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 051eb5991f6f6..d0d2a538e1e85 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -100,7 +100,6 @@ if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSIO set(LLVM_TREE_AVAILABLE ON) endif() - # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior # is undesirable. Filtering CMAKE_{LANG}_IMPLICIT_INCLUDE_DIRECTORIES to remove From openmp-commits at lists.llvm.org Tue Jul 22 08:38:36 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 08:38:36 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Remove standalone build mode (PR #149878) In-Reply-To: Message-ID: <687fb07c.050a0220.224104.a3f6@mx.google.com> https://github.com/Meinersbur edited https://github.com/llvm/llvm-project/pull/149878 From openmp-commits at lists.llvm.org Tue Jul 22 08:40:08 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 08:40:08 -0700 (PDT) Subject: [Openmp-commits] [openmp] [OpenMP] Remove standalone build mode (PR #149878) In-Reply-To: Message-ID: <687fb0d8.170a0220.293a64.c0a6@mx.google.com> https://github.com/Meinersbur ready_for_review https://github.com/llvm/llvm-project/pull/149878 From openmp-commits at lists.llvm.org Tue Jul 22 11:03:45 2025 From: openmp-commits at lists.llvm.org (Michael Kruse via Openmp-commits) Date: Tue, 22 Jul 2025 11:03:45 -0700 (PDT) Subject: [Openmp-commits] [clang] [flang] [llvm] [openmp] [Flang][OpenMP] Move builtin .mod generation into runtimes (PR #137828) In-Reply-To: Message-ID: <687fd281.630a0220.31eae0.c8c2@mx.google.com> https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/137828 >From 839198d61f9937b5504d5e036c67266b4b84da8e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 14:09:57 +0200 Subject: [PATCH 01/16] [Flang][Flang-RT][OpenMP] Move builtin .mod generation into runtimes --- clang/include/clang/Driver/Driver.h | 3 +- clang/include/clang/Driver/Options.td | 2 +- clang/include/clang/Driver/ToolChain.h | 4 + clang/lib/Driver/Driver.cpp | 10 + clang/lib/Driver/ToolChain.cpp | 6 + clang/lib/Driver/ToolChains/Flang.cpp | 7 + .../Modules}/GetToolchainDirs.cmake | 11 ++ flang-rt/CMakeLists.txt | 73 ++------ flang-rt/cmake/modules/AddFlangRT.cmake | 14 +- .../cmake/modules/AddFlangRTOffload.cmake | 14 +- flang-rt/lib/runtime/CMakeLists.txt | 109 ++++++++++- .../lib/runtime}/__cuda_builtins.f90 | 0 .../lib/runtime}/__cuda_device.f90 | 0 .../lib/runtime}/__fortran_builtins.f90 | 4 +- .../runtime}/__fortran_ieee_exceptions.f90 | 0 .../lib/runtime}/__fortran_type_info.f90 | 7 +- .../lib/runtime}/__ppc_intrinsics.f90 | 0 .../lib/runtime}/__ppc_types.f90 | 0 .../lib/runtime}/cooperative_groups.f90 | 1 + .../lib/runtime}/cudadevice.f90 | 0 .../lib/runtime}/ieee_arithmetic.f90 | 29 ++- .../lib/runtime}/ieee_exceptions.f90 | 0 .../lib/runtime}/ieee_features.f90 | 0 .../lib/runtime}/iso_c_binding.f90 | 0 .../lib/runtime}/iso_fortran_env.f90 | 0 .../lib/runtime}/iso_fortran_env_impl.f90 | 0 .../module => flang-rt/lib/runtime}/mma.f90 | 0 flang-rt/test/lit.site.cfg.py.in | 2 +- flang-rt/unittests/CMakeLists.txt | 5 +- flang/CMakeLists.txt | 2 +- .../flang/Frontend/CompilerInvocation.h | 7 + flang/lib/Frontend/CompilerInvocation.cpp | 26 +-- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 +- flang/module/.clang-format | 1 - flang/test/CMakeLists.txt | 7 +- flang/test/Driver/intrinsic-module-path.f90 | 4 +- .../Lower/HLFIR/type-bound-call-mismatch.f90 | 2 +- flang/test/Lower/OpenMP/simd_aarch64.f90 | 7 +- .../target-enter-data-default-openmp52.f90 | 4 +- flang/test/lit.cfg.py | 57 +++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/CMakeLists.txt | 1 - flang/tools/bbc/bbc.cpp | 16 +- flang/tools/f18/CMakeLists.txt | 176 ------------------ flang/tools/f18/dump.cpp | 42 ----- llvm/runtimes/CMakeLists.txt | 21 +-- openmp/CMakeLists.txt | 4 + openmp/runtime/src/CMakeLists.txt | 62 ++---- runtimes/CMakeLists.txt | 163 +++++++++++++++- 49 files changed, 512 insertions(+), 400 deletions(-) rename {flang-rt/cmake/modules => cmake/Modules}/GetToolchainDirs.cmake (94%) rename {flang/module => flang-rt/lib/runtime}/__cuda_builtins.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__cuda_device.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_builtins.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__fortran_ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__fortran_type_info.f90 (98%) rename {flang/module => flang-rt/lib/runtime}/__ppc_intrinsics.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/__ppc_types.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/cooperative_groups.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/cudadevice.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_arithmetic.f90 (95%) rename {flang/module => flang-rt/lib/runtime}/ieee_exceptions.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/ieee_features.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_c_binding.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/iso_fortran_env_impl.f90 (100%) rename {flang/module => flang-rt/lib/runtime}/mma.f90 (100%) delete mode 100644 flang/module/.clang-format delete mode 100644 flang/tools/f18/CMakeLists.txt delete mode 100644 flang/tools/f18/dump.cpp diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index d9e328fe918bc..9343fed36b6ac 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,9 +403,10 @@ class Driver { SmallString<128> &CrashDiagDir); public: - /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. + /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the + /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a8c1b5dd8ab3b..ed0d0cda49ad7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5921,7 +5921,7 @@ def prebind : Flag<["-"], "prebind">; def preload : Flag<["-"], "preload">; def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">, HelpText<"Print the full library path of ">, MetaVarName<"">, - Visibility<[ClangOption, CLOption]>; + Visibility<[ClangOption, FlangOption, CLOption]>; def print_ivar_layout : Flag<["-"], "print-ivar-layout">, Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Objective-C Ivar layout bitmap print trace">, diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index b8899e78176b4..5cac39a8ed4e9 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -542,6 +542,10 @@ class ToolChain { // Returns Triple without the OSs version. llvm::Triple getTripleWithoutOSVersion() const; + /// Returns the target-specific path for Flang's intrinsic modules in the + /// resource directory if it exists. + std::optional getDefaultIntrinsicModuleDir() const; + // Returns the target specific runtime path if it exists. std::optional getRuntimePath() const; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135eecd401..28ae55b024c33 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6536,6 +6536,16 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { if (llvm::sys::fs::exists(Twine(P))) return std::string(P); + if (IsFlangMode()) { + if (std::optional IntrPath = + TC.getDefaultIntrinsicModuleDir()) { + SmallString<128> P(*IntrPath); + llvm::sys::path::append(P, Name); + if (llvm::sys::fs::exists(Twine(P))) + return std::string(P); + } + } + SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 3f9b808b2722e..ba20cda5e339b 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -983,6 +983,12 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { return {}; } +std::optional ToolChain::getDefaultIntrinsicModuleDir() const { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "finclude"); + return getTargetSubDirPath(P); +} + std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f7255eb..dba2f6fae493b 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -957,6 +957,13 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-resource-dir"); CmdArgs.push_back(D.ResourceDir.c_str()); + // Default intrinsic module dirs must be added after any user-provided + // -fintrinsic-modules-path to have lower precedence + if (auto IntrModPath = TC.getDefaultIntrinsicModuleDir()) { + CmdArgs.push_back("-fintrinsic-modules-path"); + CmdArgs.push_back(Args.MakeArgString(*IntrModPath)); + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/flang-rt/cmake/modules/GetToolchainDirs.cmake b/cmake/Modules/GetToolchainDirs.cmake similarity index 94% rename from flang-rt/cmake/modules/GetToolchainDirs.cmake rename to cmake/Modules/GetToolchainDirs.cmake index fba12502b5946..f32e1264cc373 100644 --- a/flang-rt/cmake/modules/GetToolchainDirs.cmake +++ b/cmake/Modules/GetToolchainDirs.cmake @@ -47,6 +47,17 @@ function (get_toolchain_library_subdir outvar) endfunction () +# Corresponds to Flang's ToolChain::getDefaultIntrinsicModuleDir(). +function (get_toolchain_module_subdir outvar) + set(outval "finclude") + + get_toolchain_arch_dirname(arch_dirname) + set(outval "${outval}/${arch_dirname}") + + set(${outvar} "${outval}" PARENT_SCOPE) +endfunction () + + # Corresponds to Clang's ToolChain::getOSLibName(). Adapted from Compiler-RT. function (get_toolchain_os_dirname outvar) if (ANDROID) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d048ac4b3e5a4..09f4f9e7213f1 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -67,68 +67,17 @@ include(GetToolchainDirs) include(FlangCommon) include(HandleCompilerRT) include(ExtendPath) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) ############################ # Build Mode Introspection # ############################ -# Determine whether we are in the runtimes/runtimes-bins directory of a -# bootstrap build. -set(LLVM_TREE_AVAILABLE OFF) -if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) - set(LLVM_TREE_AVAILABLE ON) -endif() - # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") -# Determine build and install paths. -# The build path is absolute, but the install dir is relative, CMake's install -# command has to apply CMAKE_INSTALL_PREFIX itself. -get_toolchain_library_subdir(toolchain_lib_subdir) -if (LLVM_TREE_AVAILABLE) - # In a bootstrap build emit the libraries into a default search path in the - # build directory of the just-built compiler. This allows using the - # just-built compiler without specifying paths to runtime libraries. - # - # Despite Clang in the name, get_clang_resource_dir does not depend on Clang - # being added to the build. Flang uses the same resource dir as clang. - include(GetClangResourceDir) - get_clang_resource_dir(FLANG_RT_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") - get_clang_resource_dir(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT) - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") -else () - # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be - # read-only and/or shared by multiple runtimes with different build - # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any - # non-toolchain library. - # For the install prefix, still use the resource dir assuming that Flang will - # be installed there using the same prefix. This is to not have a difference - # between bootstrap and standalone runtimes builds. - set(FLANG_RT_OUTPUT_RESOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}") - set(FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") - - extend_path(FLANG_RT_OUTPUT_RESOURCE_LIB_DIR "${FLANG_RT_OUTPUT_RESOURCE_DIR}" "lib${LLVM_LIBDIR_SUFFIX}") -endif () -set(FLANG_RT_INSTALL_RESOURCE_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH_DEFAULT}" - CACHE PATH "Path to install runtime libraries to (default: clang resource dir)") -extend_path(FLANG_RT_INSTALL_RESOURCE_LIB_PATH "${FLANG_RT_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_PATH) -# FIXME: For the libflang_rt.so, the toolchain resource lib dir is not a good -# destination because it is not a ld.so default search path. -# The machine where the executable is eventually executed may not be the -# machine where the Flang compiler and its resource dir is installed, so -# setting RPath by the driver is not an solution. It should belong into -# /usr/lib//libflang_rt.so, like e.g. libgcc_s.so. -# But the linker as invoked by the Flang driver also requires -# libflang_rt.so to be found when linking and the resource lib dir is -# the only reliable location. -cmake_path(NORMAL_PATH FLANG_RT_OUTPUT_RESOURCE_LIB_DIR) -cmake_path(NORMAL_PATH FLANG_RT_INSTALL_RESOURCE_LIB_PATH) - ################# # Build Options # @@ -243,6 +192,22 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +include(CheckFortranSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state(RESET) +set(CMAKE_REQUIRED_FLAGS "-ffree-form") +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +check_fortran_source_compiles([[ + subroutine test_quadmath + real(16) :: var1 + end + ]] + FORTRAN_SUPPORTS_REAL16 +) +cmake_pop_check_state() + +flang_module_fortran_enable() + # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) find_compiler_rt_library(builtins FLANG_RT_BUILTINS_LIBRARY) @@ -338,4 +303,4 @@ if (FLANG_RT_INCLUDE_TESTS) add_subdirectory(unittests) else () add_custom_target(check-flang-rt) -endif() +endif() \ No newline at end of file diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake index e51590fdae3d3..febaac3058b3f 100644 --- a/flang-rt/cmake/modules/AddFlangRT.cmake +++ b/flang-rt/cmake/modules/AddFlangRT.cmake @@ -190,6 +190,12 @@ function (add_flangrt_library name) endif () endif () + if (build_object) + add_library(${name}.compile ALIAS "${name_object}") + else () + add_library(${name}.compile ALIAS "${default_target}") + endif () + foreach (tgtname IN LISTS libtargets) if (NOT WIN32) # Use same stem name for .a and .so. Common in UNIX environments. @@ -334,13 +340,13 @@ function (add_flangrt_library name) if (ARG_INSTALL_WITH_TOOLCHAIN) set_target_properties(${tgtname} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" - LIBRARY_OUTPUT_DIRECTORY "${FLANG_RT_OUTPUT_RESOURCE_LIB_DIR}" + ARCHIVE_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" + LIBRARY_OUTPUT_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_LIB_DIR}" ) install(TARGETS ${tgtname} - ARCHIVE DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" - LIBRARY DESTINATION "${FLANG_RT_INSTALL_RESOURCE_LIB_PATH}" + ARCHIVE DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" + LIBRARY DESTINATION "${RUNTIMES_INSTALL_RESOURCE_LIB_PATH}" ) endif () diff --git a/flang-rt/cmake/modules/AddFlangRTOffload.cmake b/flang-rt/cmake/modules/AddFlangRTOffload.cmake index cbc69f3a9656a..4a6f047a86af2 100644 --- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake +++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake @@ -88,16 +88,16 @@ macro(enable_omp_offload_compilation name files) "${FLANG_RT_DEVICE_ARCHITECTURES}" ) - set(OMP_COMPILE_OPTIONS + set(OMP_COMPILE_OPTIONS $<$: -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto - ) - set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" + >) + set_property(SOURCE ${files} APPEND + PROPERTY COMPILE_DEFINITIONS ${OMP_COMPILE_OPTIONS} ) target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS}) @@ -105,6 +105,12 @@ macro(enable_omp_offload_compilation name files) set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) + + # If building flang-rt together with libomp, ensure that libomp is built first and found because -fopenmp will try to link it. + if (TARGET omp) + add_dependencies(${name} omp) + target_link_options(${name}.static PUBLIC "-L$") + endif () else() message(FATAL_ERROR "Flang-rt build with OpenMP offload is not supported for these compilers:\n" diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 332c0872e065f..1b8114c102205 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -73,7 +73,16 @@ set(supported_sources # List of source not used for GPU offloading. set(host_sources - ${FLANG_SOURCE_DIR}/module/iso_fortran_env_impl.f90 + __fortran_ieee_exceptions.f90 + __fortran_type_info.f90 + iso_fortran_env.f90 + ieee_arithmetic.f90 + ieee_exceptions.f90 + ieee_features.f90 + iso_c_binding.f90 + iso_fortran_env_impl.f90 + iso_fortran_env.f90 + command.cpp complex-powi.cpp complex-reduction.c @@ -90,6 +99,35 @@ set(host_sources unit-map.cpp ) +# Module sources that are required by other modules +set(intrinsics_sources + __fortran_builtins.f90 + __cuda_builtins.f90 +) + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "powerpc") + list(APPEND host_source + __ppc_types.f90 + __ppc_intrinsics.f90 + mma.f90 + ) +endif () + +list(APPEND supported_sources + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 +) + +# Compile as CUDA-Fortran, not directly supported by CMake +set_property(SOURCE + __cuda_device.f90 + cooperative_groups.f90 + cudadevice.f90 + APPEND PROPERTY + COMPILE_OPTIONS --offload-host-only -xcuda +) + # Sources that can be compiled directly for the GPU. set(gpu_sources ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp @@ -182,10 +220,42 @@ else () endif () +if (FORTRAN_SUPPORTS_REAL16) + add_compile_definitions(FLANG_SUPPORT_R16=1) + add_compile_options("$<$:-cpp>") +endif () + +add_compile_options( + "$<$:SHELL:-mmlir;SHELL:-ignore-missing-type-desc>" + + # Flang bug workaround: Reformating of cooked token buffer causes identifier to be split between lines + "$<$:SHELL:-Xflang;SHELL:-fno-reformat>" +) + + +# check-flang depends on this to build intrinsic modules +if (NOT TARGET flang-rt-mod) + add_custom_target(flang-rt-mod) +endif () + if (NOT WIN32) + # CMake ignores intrinsic USE dependencies + # CMake has an option Fortran_BUILDING_INSTRINSIC_MODULES/Fortran_BUILDING_INTRINSIC_MODULES to disable this behavior, unfortunately it does not work with Ninja (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) + # As a workaround, we build those intrinsic modules first such that the main runtime can depend on it. + add_flangrt_library(flang_rt.intrinsics.obj OBJECT + ${intrinsics_sources} + ) + + # This barrier exists to force all of the intrinsic modules of flang_rt.intrinsics.obj to be built before anything that depends on it. + # Without it, CMake/Ninja seem to think that the modules of flang_rt.intrinsics.obj can be built concurrently to those in flang_rt.runtime. + add_custom_target(flang_rt.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(flang_rt.intrinsics flang_rt.intrinsics.obj) + add_flangrt_library(flang_rt.runtime STATIC SHARED - ${sources} - LINK_LIBRARIES ${Backtrace_LIBRARY} + ${sources} $ + LINK_LIBRARIES flang_rt.intrinsics.obj ${Backtrace_LIBRARY} INSTALL_WITH_TOOLCHAIN ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -196,6 +266,13 @@ if (NOT WIN32) # Select a default runtime, which is used for unit and regression tests. get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET) add_library(flang_rt.runtime.unittest ALIAS "${default_target}") + + # Select a target that compiles the sources to build the public module files. + get_target_property(compile_target flang_rt.runtime.compile ALIASED_TARGET) + flang_module_target(flang_rt.intrinsics.obj PUBLIC) + flang_module_target(${compile_target} PUBLIC) + add_dependencies(${compile_target} flang_rt.intrinsics) + add_dependencies(flang-rt-mod flang_rt.intrinsics ${compile_target}) else() # Target for building all versions of the runtime add_custom_target(flang_rt.runtime) @@ -203,10 +280,15 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") + + add_flangrt_library(${name}.intrinsics OBJECT + ${intrinsics_sources} + ) + add_flangrt_library(${name} ${libtype} - ${sources} + ${sources} $ ${ARGN} - LINK_LIBRARIES ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -219,10 +301,19 @@ else() # Setting an unique Fortran_MODULE_DIRECTORY is required for each variant to # write a different .mod file. - set_target_properties(${name} - PROPERTIES - Fortran_MODULE_DIRECTORY "module.${suffix}" - ) + # One has to be selected to be the public module that is to be installed. + # We select the first. + if (_has_public_intrinsics) + set(is_public "") + else () + set(is_public PUBLIC) + set(_has_public_intrinsics "YES" PARENT_SCOPE) + endif () + + get_target_property(compile_target ${name}.compile ALIASED_TARGET) + flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${compile_target} ${is_public}) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") diff --git a/flang/module/__cuda_builtins.f90 b/flang-rt/lib/runtime/__cuda_builtins.f90 similarity index 100% rename from flang/module/__cuda_builtins.f90 rename to flang-rt/lib/runtime/__cuda_builtins.f90 diff --git a/flang/module/__cuda_device.f90 b/flang-rt/lib/runtime/__cuda_device.f90 similarity index 100% rename from flang/module/__cuda_device.f90 rename to flang-rt/lib/runtime/__cuda_device.f90 diff --git a/flang/module/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 similarity index 98% rename from flang/module/__fortran_builtins.f90 rename to flang-rt/lib/runtime/__fortran_builtins.f90 index 4d134fa4b62b1..d5e55b5d95020 100644 --- a/flang/module/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -6,13 +6,13 @@ ! !===------------------------------------------------------------------------===! -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' ! These naming shenanigans prevent names from Fortran intrinsic modules ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang/module/__fortran_ieee_exceptions.f90 b/flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 similarity index 100% rename from flang/module/__fortran_ieee_exceptions.f90 rename to flang-rt/lib/runtime/__fortran_ieee_exceptions.f90 diff --git a/flang/module/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 similarity index 98% rename from flang/module/__fortran_type_info.f90 rename to flang-rt/lib/runtime/__fortran_type_info.f90 index 6af2a5a5e30ff..2f936c3787a61 100644 --- a/flang/module/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,8 +12,11 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info - - use, intrinsic :: __fortran_builtins, & +#if 0 + use __fortran_builtins, & +#else + use, intrinsic :: __fortran_builtins, & +#endif only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang/module/__ppc_intrinsics.f90 b/flang-rt/lib/runtime/__ppc_intrinsics.f90 similarity index 100% rename from flang/module/__ppc_intrinsics.f90 rename to flang-rt/lib/runtime/__ppc_intrinsics.f90 diff --git a/flang/module/__ppc_types.f90 b/flang-rt/lib/runtime/__ppc_types.f90 similarity index 100% rename from flang/module/__ppc_types.f90 rename to flang-rt/lib/runtime/__ppc_types.f90 diff --git a/flang/module/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 similarity index 95% rename from flang/module/cooperative_groups.f90 rename to flang-rt/lib/runtime/cooperative_groups.f90 index b8875f72f8079..82d1e0fe84042 100644 --- a/flang/module/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,6 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr +use :: cudadevice ! implicit dependency, make explicit for CMake implicit none diff --git a/flang/module/cudadevice.f90 b/flang-rt/lib/runtime/cudadevice.f90 similarity index 100% rename from flang/module/cudadevice.f90 rename to flang-rt/lib/runtime/cudadevice.f90 diff --git a/flang/module/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 similarity index 95% rename from flang/module/ieee_arithmetic.f90 rename to flang-rt/lib/runtime/ieee_arithmetic.f90 index 4e938a2daaa91..b3288a5cdd69f 100644 --- a/flang/module/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,13 +336,28 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L -#define IEEE_IS_FINITE_R(XKIND) \ - elemental logical function ieee_is_finite_a##XKIND(x); \ - real(XKIND), intent(in) :: x; \ - !dir$ ignore_tkr(d) x; \ - end function ieee_is_finite_a##XKIND; +! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite - SPECIFICS_R(IEEE_IS_FINITE_R) +elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a2; +elemental logical function ieee_is_finite_a3(x); real(3), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a3; + elemental logical function ieee_is_finite_a4(x); real(4), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a4; +elemental logical function ieee_is_finite_a8(x); real(8), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a8; +elemental logical function ieee_is_finite_a10(x); real(10), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a10; +#if FLANG_SUPPORT_R16 +elemental logical function ieee_is_finite_a16(x); real(16), intent(in) :: x; +!dir$ ignore_tkr(d) x; +end function ieee_is_finite_a16; +#endif end interface ieee_is_finite public :: ieee_is_finite #undef IEEE_IS_FINITE_R diff --git a/flang/module/ieee_exceptions.f90 b/flang-rt/lib/runtime/ieee_exceptions.f90 similarity index 100% rename from flang/module/ieee_exceptions.f90 rename to flang-rt/lib/runtime/ieee_exceptions.f90 diff --git a/flang/module/ieee_features.f90 b/flang-rt/lib/runtime/ieee_features.f90 similarity index 100% rename from flang/module/ieee_features.f90 rename to flang-rt/lib/runtime/ieee_features.f90 diff --git a/flang/module/iso_c_binding.f90 b/flang-rt/lib/runtime/iso_c_binding.f90 similarity index 100% rename from flang/module/iso_c_binding.f90 rename to flang-rt/lib/runtime/iso_c_binding.f90 diff --git a/flang/module/iso_fortran_env.f90 b/flang-rt/lib/runtime/iso_fortran_env.f90 similarity index 100% rename from flang/module/iso_fortran_env.f90 rename to flang-rt/lib/runtime/iso_fortran_env.f90 diff --git a/flang/module/iso_fortran_env_impl.f90 b/flang-rt/lib/runtime/iso_fortran_env_impl.f90 similarity index 100% rename from flang/module/iso_fortran_env_impl.f90 rename to flang-rt/lib/runtime/iso_fortran_env_impl.f90 diff --git a/flang/module/mma.f90 b/flang-rt/lib/runtime/mma.f90 similarity index 100% rename from flang/module/mma.f90 rename to flang-rt/lib/runtime/mma.f90 diff --git a/flang-rt/test/lit.site.cfg.py.in b/flang-rt/test/lit.site.cfg.py.in index 662d076b1fe24..0e9dc08b59925 100644 --- a/flang-rt/test/lit.site.cfg.py.in +++ b/flang-rt/test/lit.site.cfg.py.in @@ -6,7 +6,7 @@ config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.flang_source_dir = "@FLANG_SOURCE_DIR@" config.flang_rt_source_dir = "@FLANG_RT_SOURCE_DIR@" config.flang_rt_binary_test_dir = os.path.dirname(__file__) -config.flang_rt_output_resource_lib_dir = "@FLANG_RT_OUTPUT_RESOURCE_LIB_DIR@" +config.flang_rt_output_resource_lib_dir = "@RUNTIMES_OUTPUT_RESOURCE_LIB_DIR@" config.flang_rt_experimental_offload_support = "@FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT@" config.cc = "@CMAKE_C_COMPILER@" config.flang = "@CMAKE_Fortran_COMPILER@" diff --git a/flang-rt/unittests/CMakeLists.txt b/flang-rt/unittests/CMakeLists.txt index 5282196174134..82f32d027944e 100644 --- a/flang-rt/unittests/CMakeLists.txt +++ b/flang-rt/unittests/CMakeLists.txt @@ -53,9 +53,8 @@ function(add_flangrt_unittest_offload_properties target) # FIXME: replace 'native' in --offload-arch option with the list # of targets that Fortran Runtime was built for. if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP") - set_target_properties(${target} - PROPERTIES LINK_OPTIONS - "-fopenmp;--offload-arch=native" + set_property(TARGET ${target} APPEND + PROPERTY LINK_OPTIONS -fopenmp --offload-arch=native ) endif() endfunction() diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 0bfada476348a..5bdb43e5ab5e6 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -273,7 +273,7 @@ set(FLANG_TOOLS_INSTALL_DIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "Path for binary subdirectory (defaults to '${CMAKE_INSTALL_BINDIR}')") mark_as_advanced(FLANG_TOOLS_INSTALL_DIR) -set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_BINARY_DIR}/include/flang) +set(FLANG_INTRINSIC_MODULES_DIR "" CACHE PATH "Additional search path for modules; needed for running all tests if not building flang-rt in a bootstrapping build") set(FLANG_INCLUDE_DIR ${FLANG_BINARY_DIR}/include) # TODO: Remove when libclangDriver is lifted out of Clang diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index d294955af780e..feaee28a53349 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -92,6 +92,10 @@ class CompilerInvocation : public CompilerInvocationBase { // intrinsic of iso_fortran_env. std::string allCompilerInvocOpts; + /// Location of the resource directory containing files specific to this + /// instance/version of Flang. + std::string resourceDir; + /// Semantic options // TODO: Merge with or translate to frontendOpts. We shouldn't need two sets // of options. @@ -177,6 +181,9 @@ class CompilerInvocation : public CompilerInvocationBase { getSemanticsCtx(Fortran::parser::AllCookedSources &allCookedSources, const llvm::TargetMachine &); + std::string &getResourceDir() { return resourceDir; } + const std::string &getResourceDir() const { return resourceDir; } + std::string &getModuleDir() { return moduleDir; } const std::string &getModuleDir() const { return moduleDir; } diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f55d866435997..bd0f0a381bbd3 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -884,16 +884,6 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, return diags.getNumErrors() == numErrorsBefore; } -// Generate the path to look for intrinsic modules -static std::string getIntrinsicDir(const char *argv) { - // TODO: Find a system independent API - llvm::SmallString<128> driverPath; - driverPath.assign(llvm::sys::fs::getMainExecutable(argv, nullptr)); - llvm::sys::path::remove_filename(driverPath); - driverPath.append("/../include/flang/"); - return std::string(driverPath); -} - // Generate the path to look for OpenMP headers static std::string getOpenMPHeadersDir(const char *argv) { llvm::SmallString<128> includePath; @@ -1509,6 +1499,14 @@ bool CompilerInvocation::createFromArgs( success = false; } + // User-specified or default resource dir + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_resource_dir)) + invoc.resourceDir = a->getValue(); + else + invoc.resourceDir = clang::driver::Driver::GetResourcesPath( + llvm::sys::fs::getMainExecutable(argv0, nullptr)); + // -flang-experimental-hlfir if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir) || args.hasArg(clang::driver::options::OPT_emit_hlfir)) { @@ -1759,9 +1757,11 @@ void CompilerInvocation::setFortranOpts() { preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), preprocessorOptions.searchDirectoriesFromIntrModPath.end()); - // Add the default intrinsic module directory - fortranOptions.intrinsicModuleDirectories.emplace_back( - getIntrinsicDir(getArgv0())); + // Add the ordered list of -fintrinsic-modules-path + fortranOptions.intrinsicModuleDirectories.insert( + fortranOptions.intrinsicModuleDirectories.end(), + preprocessorOptions.searchDirectoriesFromIntrModPath.begin(), + preprocessorOptions.searchDirectoriesFromIntrModPath.end()); // Add the directory supplied through -J/-module-dir to the list of search // directories diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 5ca53ee48955e..14f422a6098b2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1365,10 +1365,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError(loc, - "runtime derived type info descriptor was not " - "generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError( + loc, llvm::Twine("runtime derived type info descriptor of '") + name + + "' was not generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition diff --git a/flang/module/.clang-format b/flang/module/.clang-format deleted file mode 100644 index e3845288a2aec..0000000000000 --- a/flang/module/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 8520bec646971..6cc7abd5fe7dc 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -59,7 +59,6 @@ set(FLANG_TEST_PARAMS set(FLANG_TEST_DEPENDS flang - module_files fir-opt tco bbc @@ -97,8 +96,12 @@ if (LLVM_BUILD_EXAMPLES) ) endif () +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) + list(APPEND FLANG_TEST_DEPENDS "flang-rt-mod") # For intrinsic module files (in flang-rt/) +endif () + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) - list(APPEND FLANG_TEST_DEPENDS "libomp-mod") + list(APPEND FLANG_TEST_DEPENDS "libomp-mod") # For omplib.mod and omplib_kinds.mod (in openmp/) endif () add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 8fe486cf61c83..a6f0cb04453f4 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -6,8 +6,8 @@ !----------------------------------------- ! FRONTEND FLANG DRIVER (flang -fc1) !----------------------------------------- -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s --allow-empty --check-prefix=WITHOUT -! RUN: not %flang_fc1 -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=GIVEN +! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty +! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN ! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found ! WITHOUT-NOT: 'iso_fortran_env.mod' was not found diff --git a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 index 2e0c72ccfe048..29a9784b984b6 100644 --- a/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 +++ b/flang/test/Lower/HLFIR/type-bound-call-mismatch.f90 @@ -1,6 +1,6 @@ ! Test interface that lowering handles small interface mismatch with ! type bound procedures. -! RUN: bbc -emit-hlfir %s -o - -I nw | FileCheck %s +! RUN: %bbc_bare -emit-hlfir %s -o - -I nw | FileCheck %s module dispatch_mismatch type t diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 index 735237223bcb5..2e4136273c75b 100644 --- a/flang/test/Lower/OpenMP/simd_aarch64.f90 +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -1,6 +1,11 @@ -! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! Tests for 2.9.3.1 Simd and target dependent default alignment for AArch64 ! The default alignment for AARCH64 is 0 so we do not emit aligned clause ! REQUIRES: aarch64-registered-target + +! Requires aarch64 iso_c_binding.mod which currently is only available if your host is also aarch64 +! FIXME: Make flang a cross-compiler +! UNSUPPORTED: true + ! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s subroutine simdloop_aligned_cptr(A) use iso_c_binding diff --git a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 index 0d4fd964b71ec..72b5fea2c171e 100644 --- a/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 +++ b/flang/test/Lower/OpenMP/target-enter-data-default-openmp52.f90 @@ -1,7 +1,7 @@ ! This test checks the lowering and application of default map types for the target enter/exit data constructs and map clauses -!RUN: %flang -fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 -!RUN: not %flang -fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 +!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-version=52 -o - %s | FileCheck %s --check-prefix=CHECK-52 +!RUN: not %flang_fc1 -emit-fir -fopenmp -fopenmp-version=51 -o - %s 2>&1| FileCheck %s --check-prefix=CHECK-51 module test real, allocatable :: A diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 7eb57670ac767..8a375fdf49b8b 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -127,19 +127,64 @@ if config.default_sysroot: config.available_features.add("default_sysroot") + +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +if not flang_exe: + lit_config.fatal(f"Could not identify flang executable") + +def get_resource_module_intrinsic_dir(): + # Determine the intrinsic module search path that is added by the driver. If + # skipping the driver using -fc1, we need to append the path manually. + flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + if not flang_intrinsics_dir: + return None + flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) + return flang_intrinsics_dir + +intrinsics_search_args = [] +if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): + intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") + +extra_intrinsics_search_args = [] +if config.flang_intrinsic_modules_dir: + extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] + lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + +config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ + ToolSubst( + "bbc", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), ToolSubst( "%flang", command=FindTool("flang"), - extra_args=isysroot_flag, + extra_args=isysroot_flag + extra_intrinsics_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", command=FindTool("flang"), - extra_args=["-fc1"], + extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", + ), + + # For not having intrinsic search paths to be added implicitly + ToolSubst( + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", + ), + ToolSubst( + "%flang_bare", + command=FindTool("flang"), + extra_args=isysroot_flag, unresolved="fatal", ), ] @@ -177,6 +222,14 @@ if result: config.environment["LIBPGMATH"] = True +# If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. +config.available_features.add("module-independent") +if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: + config.available_features.add("flangrt-modules") +else: + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") + # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" if config.have_openmp_rtl: diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index ae5144010bc8b..5d07af42c6487 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -23,6 +23,7 @@ config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@ config.osx_sysroot = path(r"@CMAKE_OSX_SYSROOT@") config.targets_to_build = "@TARGETS_TO_BUILD@" config.default_sysroot = "@DEFAULT_SYSROOT@" +config.have_flangrt_mod = ("flang-rt" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) config.have_openmp_rtl = ("@LLVM_TOOL_OPENMP_BUILD@" == "TRUE") or ("openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";")) if "openmp" in "@LLVM_ENABLE_RUNTIMES@".lower().split(";"): config.openmp_module_dir = "@CMAKE_BINARY_DIR@/runtimes/runtimes-bins/openmp/runtime/src" diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 1d2d2c608faf9..1b297af74cae7 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -7,7 +7,6 @@ #===------------------------------------------------------------------------===# add_subdirectory(bbc) -add_subdirectory(f18) add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index edfc878d17524..a98a94e32bee4 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -98,6 +98,11 @@ static llvm::cl::alias llvm::cl::desc("intrinsic module directory"), llvm::cl::aliasopt(intrinsicIncludeDirs)); +static llvm::cl::alias + intrinsicModulePath("fintrinsic-modules-path", + llvm::cl::desc("intrinsic module search paths"), + llvm::cl::aliasopt(intrinsicIncludeDirs)); + static llvm::cl::opt moduleDir("module", llvm::cl::desc("module output directory (default .)"), llvm::cl::init(".")); @@ -568,17 +573,8 @@ int main(int argc, char **argv) { ProgramName programPrefix; programPrefix = argv[0] + ": "s; - if (includeDirs.size() == 0) { + if (includeDirs.size() == 0) includeDirs.push_back("."); - // Default Fortran modules should be installed in include/flang (a sibling - // to the bin) directory. - intrinsicIncludeDirs.push_back( - llvm::sys::path::parent_path( - llvm::sys::path::parent_path( - llvm::sys::fs::getMainExecutable(argv[0], nullptr))) - .str() + - "/include/flang"); - } Fortran::parser::Options options; options.predefinitions.emplace_back("__flang__"s, "1"s); diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt deleted file mode 100644 index 546b6acaaf91d..0000000000000 --- a/flang/tools/f18/CMakeLists.txt +++ /dev/null @@ -1,176 +0,0 @@ -set(LLVM_LINK_COMPONENTS - FrontendOpenACC - FrontendOpenMP - Support - ) - -# Define the list of Fortran module files for which it is -# sufficient to generate the module file via -fsyntax-only. -set(MODULES - "__fortran_builtins" - "__fortran_ieee_exceptions" - "__fortran_type_info" - "__ppc_types" - "__ppc_intrinsics" - "mma" - "__cuda_builtins" - "__cuda_device" - "cooperative_groups" - "cudadevice" - "ieee_arithmetic" - "ieee_exceptions" - "ieee_features" - "iso_c_binding" - "iso_fortran_env" - "iso_fortran_env_impl" -) - -# Check if 128-bit float computations can be done via long double. -check_cxx_source_compiles( - "#include - #if LDBL_MANT_DIG != 113 - #error LDBL_MANT_DIG != 113 - #endif - int main() { return 0; } - " - HAVE_LDBL_MANT_DIG_113) - -# Figure out whether we can support REAL(KIND=16) -if (FLANG_RUNTIME_F128_MATH_LIB) - set(FLANG_SUPPORT_R16 "1") -elseif (HAVE_LDBL_MANT_DIG_113) - set(FLANG_SUPPORT_R16 "1") -else() - set(FLANG_SUPPORT_R16 "0") -endif() - -# Init variable to hold extra object files coming from the Fortran modules; -# these module files will be contributed from the CMakeLists in flang/tools/f18. -set(module_objects "") - -# Create module files directly from the top-level module source directory. -# If CMAKE_CROSSCOMPILING, then the newly built flang executable was -# cross compiled, and thus can't be executed on the build system and thus -# can't be used for generating module files. -if (NOT CMAKE_CROSSCOMPILING) - foreach(filename ${MODULES}) - set(depends "") - set(opts "") - if(${filename} STREQUAL "__fortran_builtins" OR - ${filename} STREQUAL "__ppc_types") - elseif(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) - elseif(${filename} STREQUAL "__cuda_device" OR - ${filename} STREQUAL "cudadevice" OR - ${filename} STREQUAL "cooperative_groups") - set(opts -fc1 -xcuda) - if(${filename} STREQUAL "__cuda_device") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) - elseif(${filename} STREQUAL "cudadevice") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device.mod) - elseif(${filename} STREQUAL "cooperative_groups") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/cudadevice.mod) - endif() - else() - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) - if(${filename} STREQUAL "iso_fortran_env") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/iso_fortran_env_impl.mod) - endif() - if(${filename} STREQUAL "ieee_arithmetic" OR - ${filename} STREQUAL "ieee_exceptions") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) - endif() - endif() - if(NOT ${filename} STREQUAL "__fortran_type_info" AND NOT ${filename} STREQUAL "__fortran_builtins") - set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) - endif() - - # The module contains PPC vector types that needs the PPC target. - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") - if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) - set(opts "--target=ppc64le") - else() - # Do not compile PPC module if the target is not available. - continue() - endif() - endif() - - set(decls "") - if (FLANG_SUPPORT_R16) - set(decls "-DFLANG_SUPPORT_R16") - endif() - - # Some modules have an implementation part that needs to be added to the - # flang_rt.runtime library. - set(compile_with "-fsyntax-only") - set(object_output "") - set(include_in_link FALSE) - - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support - add_custom_command(OUTPUT ${base}.mod ${object_output} - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang ${opts} ${decls} -cpp ${compile_with} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${FLANG_SOURCE_DIR}/module/${filename}.f90 - DEPENDS flang ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} - ) - list(APPEND MODULE_FILES ${base}.mod) - install(FILES ${base}.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - - # If a module has been compiled into an object file, add the file to - # the link line for the flang_rt.runtime library. - if(include_in_link) - list(APPEND module_objects ${object_output}) - endif() - endforeach() - - # Set a CACHE variable that is visible to the CMakeLists.txt in runtime/, so that - # the compiled Fortran modules can be added to the link line of the flang_rt.runtime - # library. - set(FORTRAN_MODULE_OBJECTS ${module_objects} CACHE INTERNAL "" FORCE) - - # Special case for omp_lib.mod, because its source comes from openmp/runtime/src/include. - # It also produces two module files: omp_lib.mod and omp_lib_kinds.mod. Compile these - # files only if OpenMP support has been configured. - if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.mod") - set(base ${FLANG_INTRINSIC_MODULES_DIR}/omp_lib) - add_custom_command(OUTPUT ${base}.mod ${base}_kinds.mod - COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} - ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 - DEPENDS flang ${FLANG_INTRINSIC_MODULES_DIR}/iso_c_binding.mod ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.F90 ${depends} - ) - add_custom_command(OUTPUT ${base}.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}.mod ${base}.f18.mod) - add_custom_command(OUTPUT ${base}_kinds.f18.mod - DEPENDS ${base}.mod - COMMAND ${CMAKE_COMMAND} -E copy ${base}_kinds.mod ${base}_kinds.f18.mod) - list(APPEND MODULE_FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod) - install(FILES ${base}.mod ${base}.f18.mod ${base}_kinds.mod ${base}_kinds.f18.mod DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang" COMPONENT flang-module-interfaces) - elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.mod is built there") - else() - message(WARNING "Not building omp_lib.mod, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") - endif() - add_llvm_install_targets(install-flang-module-interfaces - COMPONENT flang-module-interfaces) -endif() - -add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) -set_target_properties(module_files PROPERTIES FOLDER "Flang/Resources") - -# TODO Move this to a more suitable location -# Copy the generated omp_lib.h header file, if OpenMP support has been configured. -if (LLVM_TOOL_OPENMP_BUILD) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_PROJECTS, building omp_lib.h") - file(COPY ${CMAKE_BINARY_DIR}/projects/openmp/runtime/src/omp_lib.h DESTINATION "${CMAKE_BINARY_DIR}/include/flang/OpenMP/" FILE_PERMISSIONS OWNER_READ OWNER_WRITE) - install(FILES ${CMAKE_BINARY_DIR}/include/flang/OpenMP/omp_lib.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/flang/OpenMP") -elseif ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES) - message(STATUS "OpenMP runtime support enabled via LLVM_ENABLE_RUNTIMES, assuming omp_lib.h is built there") -else() - message(STATUS "Not copying omp_lib.h, no OpenMP runtime in either LLVM_ENABLE_PROJECTS or LLVM_ENABLE_RUNTIMES") -endif() diff --git a/flang/tools/f18/dump.cpp b/flang/tools/f18/dump.cpp deleted file mode 100644 index f11b5aedf4c6a..0000000000000 --- a/flang/tools/f18/dump.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===-- tools/f18/dump.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// This file defines Dump routines available for calling from the debugger. -// Each is based on operator<< for that type. There are overloadings for -// reference and pointer, and for dumping to a provided raw_ostream or errs(). - -#ifdef DEBUGF18 - -#include "llvm/Support/raw_ostream.h" - -#define DEFINE_DUMP(ns, name) \ - namespace ns { \ - class name; \ - llvm::raw_ostream &operator<<(llvm::raw_ostream &, const name &); \ - } \ - void Dump(llvm::raw_ostream &os, const ns::name &x) { os << x << '\n'; } \ - void Dump(llvm::raw_ostream &os, const ns::name *x) { \ - if (x == nullptr) \ - os << "null\n"; \ - else \ - Dump(os, *x); \ - } \ - void Dump(const ns::name &x) { Dump(llvm::errs(), x); } \ - void Dump(const ns::name *x) { Dump(llvm::errs(), *x); } - -namespace Fortran { -DEFINE_DUMP(parser, Name) -DEFINE_DUMP(parser, CharBlock) -DEFINE_DUMP(semantics, Symbol) -DEFINE_DUMP(semantics, Scope) -DEFINE_DUMP(semantics, IntrinsicTypeSpec) -DEFINE_DUMP(semantics, DerivedTypeSpec) -DEFINE_DUMP(semantics, DeclTypeSpec) -} // namespace Fortran - -#endif diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 94a43b96d2188..96391ed70133e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -252,6 +252,11 @@ function(runtime_default_target) # OpenMP tests list(APPEND extra_targets "libomp-mod") endif () + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + # The target flang-rt-mod is a dependee of check-flang needed to run its + # tests. + list(APPEND extra_targets "flang-rt-mod") + endif () if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_LIT_TESTSUITES "@${LLVM_BINARY_DIR}/runtimes/runtimes-bins/lit.tests") @@ -519,18 +524,12 @@ if(build_runtimes) endif() endforeach() endif() + + # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) - if (${LLVM_TOOL_FLANG_BUILD}) - message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") - set(LIBOMP_FORTRAN_MODULES_COMPILER "${CMAKE_BINARY_DIR}/bin/flang") - set(LIBOMP_MODULES_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}/flang") - # TODO: This is a workaround until flang becomes a first-class project - # in llvm/CMakeList.txt. Until then, this line ensures that flang is - # built before "openmp" is built as a runtime project. Besides "flang" - # to build the compiler, we also need to add "module_files" to make sure - # that all .mod files are also properly build. - list(APPEND extra_deps "flang" "module_files") - endif() foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) list(APPEND extra_deps ${dep}) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index c206386fa6b61..3b64db92dcdea 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -100,6 +100,10 @@ set(OPENMP_TEST_FLAGS "" CACHE STRING set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") +if (LLVM_RUNTIMES_BUILD) + flang_module_fortran_enable() +endif () + set(ENABLE_LIBOMPTARGET ON) # Currently libomptarget cannot be compiled on Windows or MacOS X. # Since the device plugins are only supported on Linux anyway, diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 698e185d9c4dd..66acb3fd04136 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -368,45 +368,6 @@ endif() configure_file(${LIBOMP_INC_DIR}/omp_lib.h.var omp_lib.h @ONLY) configure_file(${LIBOMP_INC_DIR}/omp_lib.F90.var omp_lib.F90 @ONLY) -set(BUILD_FORTRAN_MODULES False) -if (NOT ${LIBOMP_FORTRAN_MODULES_COMPILER} STREQUAL "") - # If libomp is built as an LLVM runtime and the flang compiler is available, - # compile the Fortran module files. - message(STATUS "configuring openmp to build Fortran module files using ${LIBOMP_FORTRAN_MODULES_COMPILER}") - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${LIBOMP_FORTRAN_MODULES_COMPILER} -cpp -fsyntax-only ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set(BUILD_FORTRAN_MODULES True) -elseif(${LIBOMP_FORTRAN_MODULES}) - # The following requests explicit building of the Fortran module files - # Workaround for gfortran to build modules with the - # omp_sched_monotonic integer parameter - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - set(ADDITIONAL_Fortran_FLAGS "-fno-range-check") - endif() - add_custom_target(libomp-mod ALL DEPENDS omp_lib.mod omp_lib_kinds.mod) - set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Misc") - libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) - if(CMAKE_Fortran_COMPILER_SUPPORTS_F90) - set(LIBOMP_FORTRAN_SOURCE_FILE omp_lib.F90) - else() - message(FATAL_ERROR "Fortran module build requires Fortran 90 compiler") - endif() - add_custom_command( - OUTPUT omp_lib.mod omp_lib_kinds.mod - COMMAND ${CMAKE_Fortran_COMPILER} -c ${ADDITIONAL_Fortran_FLAGS} - ${LIBOMP_CONFIGURED_FFLAGS} ${LIBOMP_FORTRAN_SOURCE_FILE} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_FORTRAN_SOURCE_FILE} - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES omp_lib${CMAKE_C_OUTPUT_EXTENSION}) - set(BUILD_FORTRAN_MODULES True) -endif() # Move files to exports/ directory if requested if(${LIBOMP_COPY_EXPORTS}) @@ -482,15 +443,32 @@ if(${LIBOMP_OMPT_SUPPORT}) install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) endif() -if(${BUILD_FORTRAN_MODULES}) + + +# Build the modules files if a Fortran compiler is available +# only LLVM_ENABLE_RUNTIMES=openmp is supported, LLVM_ENABLE_PROJECTS=openmp has been deprecated. +if(LLVM_RUNTIMES_BUILD AND RUNTIMES_FLANG_MODULES_ENABLED) + # TODO: Consider including in LIBOMP_SOURCE_FILES instead + add_library(libomp-mod OBJECT + omp_lib.F90 + ) + set_target_properties(libomp-mod PROPERTIES FOLDER "OpenMP/Fortran Modules") + + libomp_get_fflags(LIBOMP_CONFIGURED_FFLAGS) + target_compile_options(libomp-mod PRIVATE ${LIBOMP_CONFIGURED_FFLAGS}) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_compile_options(libomp-mod PRIVATE -fno-range-check) + endif() + + flang_module_target(libomp-mod PUBLIC) + add_dependencies(libomp-mod flang-rt-mod) + set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) if (NOT ${LIBOMP_MODULES_INSTALL_PATH} STREQUAL "") set (destination ${LIBOMP_MODULES_INSTALL_PATH}) endif() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.h - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib.mod - ${CMAKE_CURRENT_BINARY_DIR}/omp_lib_kinds.mod DESTINATION ${destination} ) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index e4dd4ebfc678d..2dcc68b80b07c 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -91,6 +91,16 @@ include(CheckLibraryExists) include(LLVMCheckCompilerLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) +include(ExtendPath) + + +# Determine whether we are in the runtimes/runtimes-bins directory of a +# bootstrapping build. +set(LLVM_TREE_AVAILABLE OFF) +if (LLVM_LIBRARY_OUTPUT_INTDIR AND LLVM_RUNTIME_OUTPUT_INTDIR AND PACKAGE_VERSION) + set(LLVM_TREE_AVAILABLE ON) +endif() + # CMake omits default compiler include paths, but in runtimes build, we use # -nostdinc and -nostdinc++ and control include paths manually so this behavior @@ -98,6 +108,7 @@ include(CheckCXXCompilerFlag) # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. +# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -276,6 +287,156 @@ if(LLVM_INCLUDE_TESTS) umbrella_lit_testsuite_begin(check-runtimes) endif() +# Determine paths for files that belong into the Clang/Flang resource dir. +if (LLVM_TREE_AVAILABLE) + # In a bootstrap build emit the libraries into a default search path in the + # build directory of the just-built compiler. This allows using the + # just-built compiler without specifying paths to runtime libraries. + # + # Despite Clang in the name, get_clang_resource_dir does not depend on Clang + # being added to the build. Flang uses the same resource dir as clang. + include(GetClangResourceDir) + get_clang_resource_dir(RUNTIMES_OUTPUT_RESOURCE_DIR PREFIX "${LLVM_LIBRARY_OUTPUT_INTDIR}/..") + get_clang_resource_dir(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT) +else () + # In a standalone runtimes build, do not write into LLVM_BINARY_DIR. It may be + # read-only and/or shared by multiple runtimes with different build + # configurations (e.g. Debug/Release). Use the runtime's own lib dir like any + # non-toolchain library. + # For the install prefix, still use the resource dir assuming that Flang will + # be installed there using the same prefix. This is to not have a difference + # between bootstrap and standalone runtimes builds. + set(RUNTIMES_OUTPUT_RESOURCE_DIR "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") + set(RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT "lib${LLVM_LIBDIR_SUFFIX}/clang/${LLVM_VERSION_MAJOR}") +endif () + +# Determine build and install paths. +# The build path is absolute, but the install dir is relative, CMake's install +# command has to apply CMAKE_INSTALL_PREFIX itself. +# FIXME: For shared libraries, the toolchain resource lib dir is not a good +# destination because it is not a ld.so default search path. +# The machine where the executable is eventually executed may not be the +# machine where the Flang compiler and its resource dir is installed, so +# setting RPath by the driver is not an solution. It should belong into +# /usr/lib//lib.so, like e.g. libgcc_s.so. +# But the linker as invoked by the Flang driver also requires +# libflang_rt.so to be found when linking and the resource lib dir is +# the only reliable location. +include(GetToolchainDirs) +get_toolchain_library_subdir(toolchain_lib_subdir) +extend_path(RUNTIMES_OUTPUT_RESOURCE_LIB_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_lib_subdir}") + +set(RUNTIMES_INSTALL_RESOURCE_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH_DEFAULT}" CACHE PATH "Path to install headers, runtime libraries, and Fortran modules to (default: Clang resource dir)") +extend_path(RUNTIMES_INSTALL_RESOURCE_LIB_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_lib_subdir}") + +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_PATH) +cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) +cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) + + +# Enable building Fortran modules +# * Set up the Fortran compiler +# * Determine files location in the Clang/Flang resource dir +# * Install module files +macro (flang_module_fortran_enable) + include(CheckLanguage) + check_language(Fortran) + if(NOT CMAKE_Fortran_COMPILER) + message(STATUS "Not compiling Flang modules: Fortran not enabled") + return () + endif () + enable_language(Fortran) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + message(STATUS "Compiling Flang modules") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + else () + # TODO: Introspection of whether intrinsic modules are already available, so the openmp modules can be built without flang-rt in LLVM_ENABLE_RUNTIMES + message(STATUS "Not compiling Flang modules: flang-rt not available") + endif () + else () + message(STATUS "Compiling modules for non-Flang compiler (${CMAKE_Fortran_COMPILER_ID})") + set(RUNTIMES_FLANG_MODULES_ENABLED ON) + endif () + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + get_toolchain_module_subdir(toolchain_mod_subdir) + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "${toolchain_mod_subdir}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "${toolchain_mod_subdir}") + else () + # For non-Flang compilers, avoid the risk of Flang accidentally picking them up. + extend_path(RUNTIMES_OUTPUT_RESOURCE_MOD_DIR "${RUNTIMES_OUTPUT_RESOURCE_DIR}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + extend_path(RUNTIMES_INSTALL_RESOURCE_MOD_PATH "${RUNTIMES_INSTALL_RESOURCE_PATH}" "finclude-${CMAKE_Fortran_COMPILER_ID}") + endif () + cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_MOD_DIR) + cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_MOD_PATH) + + # Avoid module files to be installed multiple times if this macro is called multiple times + get_property(is_installed GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED) + if (NOT is_installed) + # No way to find out which mod files built by target individually, so install the entire output directory + # https://stackoverflow.com/questions/52712416/cmake-fortran-module-directory-to-be-used-with-add-library + set(destination "${RUNTIMES_INSTALL_RESOURCE_MOD_PATH}/..") + cmake_path(NORMAL_PATH destination) + install(DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + DESTINATION "${destination}" + ) + set_property(GLOBAL PROPERTY RUNTIMES_MODS_INSTALLED TRUE) + endif () +endmacro () + + +# Set options to compile Fortran module files. +# +# Usage: +# +# flang_module_target(name +# PUBLIC +# Modules files are to be used by other Fortran sources. If a library is +# compiled multiple times (e.g. static/shared, or msvcrt variants), only +# one of those can be public module files; non-public modules are still +# generated but to be forgotten deep inside the build directory to not +# conflict with each other. +# Also, installs the module with the toolchain. +# ) +function (flang_module_target tgtname) + set(options PUBLIC) + cmake_parse_arguments(ARG + "${options}" + "" + "" + ${ARGN}) + + if (NOT RUNTIMES_FLANG_MODULES_ENABLED) + return () + endif () + + # Let it find the other public module files + target_compile_options(${tgtname} PRIVATE + "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + ) + + if (ARG_PUBLIC) + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}" + ) + else () + set_target_properties(${tgtname} + PROPERTIES + Fortran_MODULE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${tgtname}.mod" + ) + endif () +endfunction () + + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) @@ -363,4 +524,4 @@ if(CMAKE_EXPORT_COMPILE_COMMANDS AND NOT ("${LLVM_BINARY_DIR}" STREQUAL "${CMAKE -o ${LLVM_BINARY_DIR}/compile_commands.json DEPENDS ${CMAKE_BINARY_DIR}/compile_commands.json) add_custom_target(merge_runtime_commands ALL DEPENDS ${LLVM_BINARY_DIR}/compile_commands.json) -endif() +endif() \ No newline at end of file >From a1533ccdd1e0ac3c153a3b8a6007004a0a6cac75 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 15:13:47 +0200 Subject: [PATCH 02/16] python format --- flang/test/lit.cfg.py | 45 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8a375fdf49b8b..b05eba8da0b0c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -128,39 +128,53 @@ config.available_features.add("default_sysroot") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) +flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) if not flang_exe: lit_config.fatal(f"Could not identify flang executable") + def get_resource_module_intrinsic_dir(): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. - flang_intrinsics_dir = subprocess.check_output([flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True).strip() + flang_intrinsics_dir = subprocess.check_output( + [flang_exe, "-print-file-name=__fortran_builtins.mod"], text=True + ).strip() if not flang_intrinsics_dir: return None flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) return flang_intrinsics_dir + intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path",flang_intrinsics_dir] + intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: - extra_intrinsics_search_args += ["-fintrinsic-modules-path", config.flang_intrinsic_modules_dir] - lit_config.note(f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}") + extra_intrinsics_search_args += [ + "-fintrinsic-modules-path", + config.flang_intrinsic_modules_dir, + ] + lit_config.note( + f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" + ) -config.substitutions.append(("%intrinsic_module_flags", ' '.join(intrinsics_search_args + extra_intrinsics_search_args))) +config.substitutions.append( + ( + "%intrinsic_module_flags", + " ".join(intrinsics_search_args + extra_intrinsics_search_args), + ) +) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ ToolSubst( "bbc", - command=FindTool("bbc"), - extra_args=intrinsics_search_args + extra_intrinsics_search_args, - unresolved="fatal", + command=FindTool("bbc"), + extra_args=intrinsics_search_args + extra_intrinsics_search_args, + unresolved="fatal", ), ToolSubst( "%flang", @@ -174,12 +188,11 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly ToolSubst( - "%bbc_bare", - command=FindTool("bbc"), - unresolved="fatal", + "%bbc_bare", + command=FindTool("bbc"), + unresolved="fatal", ), ToolSubst( "%flang_bare", @@ -225,10 +238,10 @@ def get_resource_module_intrinsic_dir(): # If intrinsic modules are not available, disable tests unless they are marked as 'module-independent'. config.available_features.add("module-independent") if config.have_flangrt_mod or config.flang_intrinsic_modules_dir: - config.available_features.add("flangrt-modules") + config.available_features.add("flangrt-modules") else: - lit_config.warning(f"Intrinsic modules not available: disabling most tests") - config.limit_to_features.add("module-independent") + lit_config.warning(f"Intrinsic modules not available: disabling most tests") + config.limit_to_features.add("module-independent") # Determine if OpenMP runtime was built (enable OpenMP tests via REQUIRES in test file) openmp_flags_substitution = "-fopenmp" >From 60ad12fb5bf75fbb33348846bbbd1929eb15ea6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 16:58:49 +0200 Subject: [PATCH 03/16] Minimize changes in .f90 files --- flang-rt/lib/runtime/__fortran_builtins.f90 | 2 +- flang-rt/lib/runtime/__fortran_type_info.f90 | 7 ++----- flang-rt/lib/runtime/cooperative_groups.f90 | 2 +- flang-rt/lib/runtime/ieee_arithmetic.f90 | 3 ++- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang-rt/lib/runtime/__fortran_builtins.f90 b/flang-rt/lib/runtime/__fortran_builtins.f90 index d5e55b5d95020..7bb078c0b428e 100644 --- a/flang-rt/lib/runtime/__fortran_builtins.f90 +++ b/flang-rt/lib/runtime/__fortran_builtins.f90 @@ -12,7 +12,7 @@ ! from being usable on INTRINSIC statements, and force the program ! to USE the standard intrinsic modules in order to access the ! standard names of the procedures. -module __fortran_builtins +module __fortran_builtins implicit none ! Set PRIVATE by default to explicitly only export what is meant diff --git a/flang-rt/lib/runtime/__fortran_type_info.f90 b/flang-rt/lib/runtime/__fortran_type_info.f90 index 2f936c3787a61..6af2a5a5e30ff 100644 --- a/flang-rt/lib/runtime/__fortran_type_info.f90 +++ b/flang-rt/lib/runtime/__fortran_type_info.f90 @@ -12,11 +12,8 @@ ! in order to generate description tables for all other derived types. module __fortran_type_info -#if 0 - use __fortran_builtins, & -#else - use, intrinsic :: __fortran_builtins, & -#endif + + use, intrinsic :: __fortran_builtins, & only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr implicit none diff --git a/flang-rt/lib/runtime/cooperative_groups.f90 b/flang-rt/lib/runtime/cooperative_groups.f90 index 82d1e0fe84042..fb6d24c8f7bc3 100644 --- a/flang-rt/lib/runtime/cooperative_groups.f90 +++ b/flang-rt/lib/runtime/cooperative_groups.f90 @@ -11,7 +11,7 @@ module cooperative_groups use, intrinsic :: __fortran_builtins, only: c_devptr => __builtin_c_devptr -use :: cudadevice ! implicit dependency, make explicit for CMake +use :: cudadevice ! implicit dependency, made explicit for CMake implicit none diff --git a/flang-rt/lib/runtime/ieee_arithmetic.f90 b/flang-rt/lib/runtime/ieee_arithmetic.f90 index b3288a5cdd69f..bad290ab30097 100644 --- a/flang-rt/lib/runtime/ieee_arithmetic.f90 +++ b/flang-rt/lib/runtime/ieee_arithmetic.f90 @@ -8,7 +8,7 @@ ! Fortran 2018 Clause 17 -#include '../include/flang/Runtime/magic-numbers.h' +#include '../../../flang/include/flang/Runtime/magic-numbers.h' module ieee_arithmetic ! F18 Clause 17.1p1: @@ -336,6 +336,7 @@ end subroutine ieee_get_underflow_mode_l##GKIND; public :: ieee_get_underflow_mode #undef IEEE_GET_UNDERFLOW_MODE_L +! Workaround for https://github.com/llvm/llvm-project/issues/139297 ! #define IEEE_IS_FINITE_R(XKIND) elemental logical function ieee_is_finite_a##XKIND(x); real(XKIND), intent(in) :: x; !dir$ ignore_tkr(d) x; end function ieee_is_finite_a##XKIND; interface ieee_is_finite elemental logical function ieee_is_finite_a2(x); real(2), intent(in) :: x; >From e58c524bd588484e99163899241d55be316b719a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 17:05:16 +0200 Subject: [PATCH 04/16] Move CMake 3.20 workaround to where it is needed --- flang-rt/CMakeLists.txt | 34 ---------------------------------- runtimes/CMakeLists.txt | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 09f4f9e7213f1..61fb9e744bce9 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,40 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) - list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 2dcc68b80b07c..a49453a5fcf67 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -108,7 +108,6 @@ endif() # paths that are inside the build directory disables this behavior. # # See https://gitlab.kitware.com/cmake/cmake/-/issues/19227 for further details. -# FIXME: This assumes compiling libc++, but creates problems for every library that uses the C++ standard library function(filter_prefixed list prefix outvar) foreach(str ${list}) @@ -335,6 +334,39 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 7d1a8bee595443112031604bf9e7bf43f3a2635a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 17 Jul 2025 19:39:06 +0200 Subject: [PATCH 05/16] Don't forget to enable Fortran --- flang-rt/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 61fb9e744bce9..fd6ad5a170934 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,7 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" >From adc01493eaa6785052dc794348149152c2c92bc3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 00:57:01 +0200 Subject: [PATCH 06/16] enable_fortran test for CMake 3.20 --- flang-rt/CMakeLists.txt | 2 -- llvm/runtimes/CMakeLists.txt | 1 - runtimes/CMakeLists.txt | 5 ++++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index fd6ad5a170934..5ffa1bcc34a41 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -159,8 +159,6 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -include(CheckFortranSourceCompiles) -include(CMakePushCheckState) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 96391ed70133e..2f2f6db16255e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -525,7 +525,6 @@ if(build_runtimes) endforeach() endif() - # TODO: Also enable Fortran for per-target runtimes needed for cross-compilation. if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index a49453a5fcf67..987d173a283be 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -338,7 +338,8 @@ cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) # LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. if (CMAKE_VERSION VERSION_LESS "3.24") cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") @@ -364,6 +365,8 @@ if (CMAKE_VERSION VERSION_LESS "3.24") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + + enable_language(Fortran) endif () endif () >From 498d60cf39c237c654c0cfddbe444d63bc87bc6c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 09:57:18 +0200 Subject: [PATCH 07/16] CI test --- flang-rt/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 5ffa1bcc34a41..d987115c8e9e8 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,6 +23,40 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. +message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH @@ -159,6 +193,7 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) +message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) >From 1bb0144eb21c2b1c8e1e66029d5573d8f5619c4c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 14:24:44 +0200 Subject: [PATCH 08/16] Enforce REAL(16) support --- flang-rt/CMakeLists.txt | 45 ++++------------------- llvm/runtimes/CMakeLists.txt | 8 ++++ runtimes/CMakeLists.txt | 71 ++++++++++++++++++------------------ 3 files changed, 51 insertions(+), 73 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index d987115c8e9e8..bfbd0af0c31dc 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -23,41 +23,6 @@ set(FLANG_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(FLANG_RT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") set(FLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../flang") -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -message("CMAKE_Fortran_PREPROCESS_SOURCE1: ${CMAKE_Fortran_PREPROCESS_SOURCE}") -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - message("CMAKE_Fortran_PREPROCESS_SOURCE2: ${CMAKE_Fortran_PREPROCESS_SOURCE}") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - endif () -endif () -enable_language(Fortran) list(APPEND CMAKE_MODULE_PATH "${FLANG_RT_SOURCE_DIR}/cmake/modules" @@ -79,6 +44,11 @@ include(CMakePushCheckState) # Path to LLVM development tools (FileCheck, llvm-lit, not, ...) set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin") +# Fortran compiler not optional for building Flang-RT +enable_language(Fortran) + +flang_module_fortran_enable() + ################# # Build Options # @@ -193,7 +163,9 @@ check_cxx_source_compiles( " HAVE_DECL_STRERROR_S) -message("CMAKE_Fortran_PREPROCESS_SOURCE3: ${CMAKE_Fortran_PREPROCESS_SOURCE}") + +# Look for support of REAL(16), if not already defined via command line. +# NOTE: Does not work with Flang and CMake < 3.24 cmake_push_check_state(RESET) set(CMAKE_REQUIRED_FLAGS "-ffree-form") set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) @@ -206,7 +178,6 @@ check_fortran_source_compiles([[ ) cmake_pop_check_state() -flang_module_fortran_enable() # Search for clang_rt.builtins library. Need in addition to msvcrt. if (WIN32) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 2f2f6db16255e..2bf2b0ee8ed50 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -528,6 +528,14 @@ if(build_runtimes) if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) list(APPEND extra_args ENABLE_FORTRAN) endif() + if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Ensure REAL(16) support in runtimes to be consistent with compiler + if (FLANG_RUNTIME_F128_MATH_LIB OR HAVE_LDBL_MANT_DIG_113) + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=TRUE") + else () + list(APPEND extra_cmake_args "-DFORTRAN_SUPPORTS_REAL16=FALSE") + endif () + endif () if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) foreach(dep opt llvm-link llvm-extract clang clang-offload-packager clang-nvlink-wrapper) if(TARGET ${dep}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 987d173a283be..51214a46f558e 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -94,6 +94,41 @@ include(CheckCXXCompilerFlag) include(ExtendPath) +# CMake 3.24 is the first version of CMake that directly recognizes Flang. +# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang, if used. +if (CMAKE_VERSION VERSION_LESS "3.24") + cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) + if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR + _Fortran_COMPILER_STEM STREQUAL "flang") + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") + + set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") + set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + + set(CMAKE_Fortran_SUBMODULE_SEP "-") + set(CMAKE_Fortran_SUBMODULE_EXT ".mod") + + set(CMAKE_Fortran_PREPROCESS_SOURCE + " -cpp -E > ") + + set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") + set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") + + set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") + + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") + set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") + set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") + + set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") + + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") + set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") + endif () +endif () + + # Determine whether we are in the runtimes/runtimes-bins directory of a # bootstrapping build. set(LLVM_TREE_AVAILABLE OFF) @@ -334,42 +369,6 @@ cmake_path(NORMAL_PATH RUNTIMES_OUTPUT_RESOURCE_LIB_DIR) cmake_path(NORMAL_PATH RUNTIMES_INSTALL_RESOURCE_LIB_PATH) -# CMake 3.24 is the first version of CMake that directly recognizes Flang. -# LLVM's requirement is only CMake 3.20, teach CMake 3.20-3.23 how to use Flang. -if (CMAKE_VERSION VERSION_LESS "3.24") - cmake_path(GET CMAKE_Fortran_COMPILER STEM _Fortran_COMPILER_STEM) - if (_Fortran_COMPILER_STEM STREQUAL "flang-new" OR - _Fortran_COMPILER_STEM STREQUAL "flang") - include(CMakeForceCompiler) - CMAKE_FORCE_Fortran_COMPILER("${CMAKE_Fortran_COMPILER}" "LLVMFlang") - - set(CMAKE_Fortran_COMPILER_ID "LLVMFlang") - set(CMAKE_Fortran_COMPILER_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") - - set(CMAKE_Fortran_SUBMODULE_SEP "-") - set(CMAKE_Fortran_SUBMODULE_EXT ".mod") - - set(CMAKE_Fortran_PREPROCESS_SOURCE - " -cpp -E > ") - - set(CMAKE_Fortran_FORMAT_FIXED_FLAG "-ffixed-form") - set(CMAKE_Fortran_FORMAT_FREE_FLAG "-ffree-form") - - set(CMAKE_Fortran_MODDIR_FLAG "-module-dir") - - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_ON "-cpp") - set(CMAKE_Fortran_COMPILE_OPTIONS_PREPROCESS_OFF "-nocpp") - set(CMAKE_Fortran_POSTPROCESS_FLAG "-ffixed-line-length-72") - - set(CMAKE_Fortran_COMPILE_OPTIONS_TARGET "--target=") - - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG "-Wl,") - set(CMAKE_Fortran_LINKER_WRAPPER_FLAG_SEP ",") - - enable_language(Fortran) - endif () -endif () - # Enable building Fortran modules # * Set up the Fortran compiler # * Determine files location in the Clang/Flang resource dir >From 8ec6d03939c3a7e2f79bb3ea5911b7607028dd7c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 18 Jul 2025 21:52:02 +0200 Subject: [PATCH 09/16] Also add dependency barrier for WIN32 --- flang-rt/lib/runtime/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 1b8114c102205..20f5d84bb2b69 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -281,14 +281,18 @@ else() function (add_win_flangrt_runtime libtype suffix msvc_lib) set(name "flang_rt.runtime.${suffix}") - add_flangrt_library(${name}.intrinsics OBJECT + add_flangrt_library(${name}.intrinsics.obj OBJECT ${intrinsics_sources} ) + add_custom_target(${name}.intrinsics + COMMENT "Intrinsic module dependency barrier" + ) + add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) add_flangrt_library(${name} ${libtype} - ${sources} $ + ${sources} $ ${ARGN} - LINK_LIBRARIES ${name}.intrinsics ${Backtrace_LIBRARY} + LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) @@ -311,8 +315,9 @@ else() endif () get_target_property(compile_target ${name}.compile ALIASED_TARGET) - flang_module_target(${name}.intrinsics ${is_public}) + flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) + add_dependencies(${compile_target} ${name}.intrinsics) add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") >From 580a0c56535a9cd1c65fd5a5e6309b73c36ae8b6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:05:34 +0200 Subject: [PATCH 10/16] Dependency barrier test --- flang-rt/lib/runtime/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 20f5d84bb2b69..bdd4318832473 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,6 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics + COMMAND echo "Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) @@ -295,6 +296,7 @@ else() LINK_LIBRARIES ${name}.intrinsics.obj ${Backtrace_LIBRARY} ADDITIONAL_HEADERS ${public_headers} ${private_headers} ) + get_target_property(compile_target ${name}.compile ALIASED_TARGET) if (msvc_lib) set_target_properties(${name} @@ -311,14 +313,13 @@ else() set(is_public "") else () set(is_public PUBLIC) + add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) set(_has_public_intrinsics "YES" PARENT_SCOPE) endif () - get_target_property(compile_target ${name}.compile ALIASED_TARGET) flang_module_target(${name}.intrinsics.obj ${is_public}) flang_module_target(${compile_target} ${is_public}) add_dependencies(${compile_target} ${name}.intrinsics) - add_dependencies(flang-rt-mod ${name}.intrinsics ${compile_target}) enable_cuda_compilation(${name} "${supported_sources}") enable_omp_offload_compilation(${name} "${supported_sources}") >From 979691a5bba4888be8c7c82d1bed4e8cdc71fff9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 19 Jul 2025 01:06:40 +0200 Subject: [PATCH 11/16] Dependency barrier info --- flang-rt/lib/runtime/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index bdd4318832473..24b84b11f9513 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -285,7 +285,7 @@ else() ${intrinsics_sources} ) add_custom_target(${name}.intrinsics - COMMAND echo "Dependency barrier" + COMMAND echo "${name} Dependency barrier" COMMENT "Intrinsic module dependency barrier" ) add_dependencies(${name}.intrinsics ${name}.intrinsics.obj) >From b4adeab58e615b9059c0a6e5bbcb376d1fe21bb2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 21 Jul 2025 21:56:26 +0200 Subject: [PATCH 12/16] Avoid unrelated changes --- clang/include/clang/Driver/Driver.h | 1 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 9343fed36b6ac..14e1e644e51aa 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -403,6 +403,7 @@ class Driver { SmallString<128> &CrashDiagDir); public: + /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 688b5aacc4bcd..609ba27bc212b 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1376,10 +1376,10 @@ getTypeDescriptor(ModOpTy mod, mlir::ConversionPatternRewriter &rewriter, return rewriter.create(loc, llvmPtrTy); if (!options.skipExternalRttiDefinition) - fir::emitFatalError( - loc, llvm::Twine("runtime derived type info descriptor of '") + name + - "' was not generated and skipExternalRttiDefinition and " - "ignoreMissingTypeDescriptors options are not set"); + fir::emitFatalError(loc, + "runtime derived type info descriptor was not " + "generated and skipExternalRttiDefinition and " + "ignoreMissingTypeDescriptors options are not set"); // Rtti for a derived type defined in another compilation unit and for which // rtti was not defined in lowering because of the skipExternalRttiDefinition >From c40f43c98ff7ddbcb52c1ac35210320926369b2c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 14:16:19 +0200 Subject: [PATCH 13/16] Use -fintrinsic-modules-path= --- flang/test/lit.cfg.py | 20 +++----------------- runtimes/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index b05eba8da0b0c..bbf9a5a9f277f 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -147,26 +147,18 @@ def get_resource_module_intrinsic_dir(): intrinsics_search_args = [] if flang_intrinsics_dir := get_resource_module_intrinsic_dir(): - intrinsics_search_args += ["-fintrinsic-modules-path", flang_intrinsics_dir] + intrinsics_search_args += [f"-fintrinsic-modules-path={flang_intrinsics_dir}"] lit_config.note(f"using default module intrinsics: {flang_intrinsics_dir}") extra_intrinsics_search_args = [] if config.flang_intrinsic_modules_dir: extra_intrinsics_search_args += [ - "-fintrinsic-modules-path", - config.flang_intrinsic_modules_dir, + f"-fintrinsic-modules-path={config.flang_intrinsic_modules_dir}", ] lit_config.note( f"using extra module intrinsics: {config.flang_intrinsic_modules_dir}" ) -config.substitutions.append( - ( - "%intrinsic_module_flags", - " ".join(intrinsics_search_args + extra_intrinsics_search_args), - ) -) - # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ @@ -193,13 +185,7 @@ def get_resource_module_intrinsic_dir(): "%bbc_bare", command=FindTool("bbc"), unresolved="fatal", - ), - ToolSubst( - "%flang_bare", - command=FindTool("flang"), - extra_args=isysroot_flag, - unresolved="fatal", - ), + ) ] # Flang has several unimplemented features. TODO messages are used to mark diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 51214a46f558e..0832767505fe1 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -454,7 +454,7 @@ function (flang_module_target tgtname) # Let it find the other public module files target_compile_options(${tgtname} PRIVATE - "$<$:SHELL:-fintrinsic-modules-path;SHELL:${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" + "$<$:-fintrinsic-modules-path=${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}>" ) if (ARG_PUBLIC) >From 18422d4d6ea24aeb29a4fece11ba7b509fbd71d6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 16:50:58 +0200 Subject: [PATCH 14/16] Rework intrinsic-module-path.f90 test --- flang/test/Driver/Inputs/ieee_arithmetic.mod | 1 + flang/test/Driver/Inputs/iso_fortran_env.mod | 1 + flang/test/Driver/intrinsic-module-path.f90 | 54 ++++++++++++++++---- flang/test/lit.cfg.py | 2 +- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/flang/test/Driver/Inputs/ieee_arithmetic.mod b/flang/test/Driver/Inputs/ieee_arithmetic.mod index 30fd57801970b..451d1af62a941 100644 --- a/flang/test/Driver/Inputs/ieee_arithmetic.mod +++ b/flang/test/Driver/Inputs/ieee_arithmetic.mod @@ -1,5 +1,6 @@ ! DUMMY module ! Added for testing purposes. The contents of this file are currently not relevant. +! Using this file file cause an error because of missing checksum module ieee_arithmetic type::ieee_round_type integer(1),private::mode=0_1 diff --git a/flang/test/Driver/Inputs/iso_fortran_env.mod b/flang/test/Driver/Inputs/iso_fortran_env.mod index 689297d52027b..ad501c2d9c1b8 100644 --- a/flang/test/Driver/Inputs/iso_fortran_env.mod +++ b/flang/test/Driver/Inputs/iso_fortran_env.mod @@ -1,5 +1,6 @@ ! DUMMY module ! Added for testing purposes. The contents of this file are currently not relevant. +! Using this file file cause an error because of missing checksum module iso_fortran_env use __fortran_builtins,only:event_type=>__builtin_event_type use __fortran_builtins,only:lock_type=>__builtin_lock_type diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.f90 index 3e696ce5d08e0..3317eb776f0a1 100644 --- a/flang/test/Driver/intrinsic-module-path.f90 +++ b/flang/test/Driver/intrinsic-module-path.f90 @@ -1,23 +1,55 @@ ! Ensure argument -fintrinsic-modules-path works as expected. -! WITHOUT the option, the default location for the module is checked and no error generated. -! With the option GIVEN, the module with the same name is PREPENDED, and considered over the -! default one, causing a CHECKSUM error. !----------------------------------------- -! FRONTEND FLANG DRIVER (flang -fc1) +! FLANG DRIVER !----------------------------------------- -! RUN: %flang_bare -fsyntax-only %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=WITHOUT --allow-empty -! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path %S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN -! RUN: not %flang_bare -fsyntax-only -fintrinsic-modules-path=%S/Inputs/ %s %intrinsic_module_flags 2>&1 | FileCheck %s --check-prefix=GIVEN +! NOTE: Depending on how Flang is built, the default intrinsics may have higher +! or lower priority than -fintrinsic-modules-path added here. Using +! basictestmoduleone.mod from Inputs/module-dir/ will trigger an error. -! WITHOUT-NOT: 'ieee_arithmetic.mod' was not found -! WITHOUT-NOT: 'iso_fortran_env.mod' was not found +! RUN: %flang -fsyntax-only -### %s 2>&1 | FileCheck %s --check-prefix=DEFAULTPATH -! GIVEN: error: Cannot use module file for module 'ieee_arithmetic': File has invalid checksum -! GIVEN: error: Cannot use module file for module 'iso_fortran_env': File has invalid checksum +! RUN: %flang -fsyntax-only -DINTRINSICS_DEFAULT %s +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTONE %s 2>&1 | FileCheck %s --check-prefix=NOINPUTONE +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTTWO %s 2>&1 | FileCheck %s --check-prefix=NOINPUTTWO +! RUN: %flang -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: %flang -fsyntax-only -DINTRINSICS_INPUTONE -fintrinsic-modules-path=%S/Inputs/ %s +! RUN: %flang -fsyntax-only -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/ -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: not %flang -fsyntax-only -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir/ -fintrinsic-modules-path=%S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=WRONGINPUTONE +!----------------------------------------- +! FLANG FRONTEND (flang -fc1) +!----------------------------------------- +! NOTE: %flang_cc1 the default intrinsics path always has higher priority than +! -fintrinsic-modules-path added here. Accidentally using +! ieee_arithmetic/iso_fortran_env from the Inputs/ directory will trigger +! an error (e.g. when the default intrinsics dir is empty). + +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT %s +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE %s 2>&1 | FileCheck %s --check-prefix=NOINPUTONE +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO %s 2>&1 | FileCheck %s --check-prefix=NOINPUTTWO +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir %s +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -fintrinsic-modules-path=%S/Inputs/ %s +! RUN: %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/ -fintrinsic-modules-path=%S/Inputs/module-dir/ %s +! RUN: not %flang_fc1 -fsyntax-only -DINTRINSICS_DEFAULT -DINTRINSICS_INPUTONE -DINTRINSICS_INPUTTWO -fintrinsic-modules-path=%S/Inputs/module-dir -fintrinsic-modules-path=%S/Inputs/ %s 2>&1 | FileCheck %s --check-prefix=WRONGINPUTONE + + +! DEFAULTPATH: flang{{.*}}-fc1{{.*}}-fintrinsic-modules-path + +! NOINPUTONE: Source file 'basictestmoduleone.mod' was not found +! NOINPUTTWO: Source file 'basictestmoduletwo.mod' was not found +! WRONGINPUTONE: 't1' not found in module 'basictestmoduleone' + program test_intrinsic_module_path +#ifdef INTRINSICS_DEFAULT use ieee_arithmetic, only: ieee_round_type use iso_fortran_env, only: team_type, event_type, lock_type +#endif +#ifdef INTRINSICS_INPUTONE + use basictestmoduleone, only: t1 +#endif +#ifdef INTRINSICS_INPUTTWO + use basictestmoduletwo, only: t2 +#endif end program diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index bbf9a5a9f277f..ba1e5d55b503c 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -180,7 +180,7 @@ def get_resource_module_intrinsic_dir(): extra_args=["-fc1"] + intrinsics_search_args + extra_intrinsics_search_args, unresolved="fatal", ), - # For not having intrinsic search paths to be added implicitly + # Do not implicitly add intrinsic search paths ToolSubst( "%bbc_bare", command=FindTool("bbc"), >From 9143388ef4c1c2bc9b09595c69ed85eac1daf2c8 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 17:12:30 +0200 Subject: [PATCH 15/16] Remove hint to reduce diff size --- clang/include/clang/Driver/Driver.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 451521eb70760..78a4c5738ae66 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -405,8 +405,6 @@ class Driver { /// Takes the path to a binary that's either in bin/ or lib/ and returns /// the path to clang's resource directory. - /// Do not pass argv[0] as argument, llvm-lit does not adjust argv[0] to the - /// changing cwd. Use llvm::sys::fs::getMainExecutable instead. static std::string GetResourcesPath(StringRef BinaryPath); Driver(StringRef ClangExecutable, StringRef TargetTriple, >From 68efc67e99218a9ccc01df0cfd7875ade07815ee Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 22 Jul 2025 19:49:07 +0200 Subject: [PATCH 16/16] Enable preprocessor for test --- .../{intrinsic-module-path.f90 => intrinsic-module-path.F90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename flang/test/Driver/{intrinsic-module-path.f90 => intrinsic-module-path.F90} (100%) diff --git a/flang/test/Driver/intrinsic-module-path.f90 b/flang/test/Driver/intrinsic-module-path.F90 similarity index 100% rename from flang/test/Driver/intrinsic-module-path.f90 rename to flang/test/Driver/intrinsic-module-path.F90 From openmp-commits at lists.llvm.org Tue Jul 22 11:30:17 2025 From: openmp-commits at lists.llvm.org (Anton Korobeynikov via Openmp-commits) Date: Tue, 22 Jul 2025 11:30:17 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687fd8b9.170a0220.11bd60.da93@mx.google.com> https://github.com/asl closed https://github.com/llvm/llvm-project/pull/149987 From openmp-commits at lists.llvm.org Tue Jul 22 11:30:25 2025 From: openmp-commits at lists.llvm.org (Anton Korobeynikov via Openmp-commits) Date: Tue, 22 Jul 2025 11:30:25 -0700 (PDT) Subject: [Openmp-commits] [openmp] Add OpenMP test file for AI review (PR #149987) In-Reply-To: Message-ID: <687fd8c1.170a0220.355cec.e101@mx.google.com> https://github.com/asl locked https://github.com/llvm/llvm-project/pull/149987