[llvm] [offload] Fix typos discovered by codespell (PR #125119)

Thu Jan 30 13:24:57 PST 2025

https://github.com/cclauss created https://github.com/llvm/llvm-project/pull/125119

https://github.com/codespell-project/codespell

% `codespell --ignore-words-list=archtype,hsa,identty,inout,iself,nd,te,ths,vertexes --write-changes`

>From 6144d2b0d7cbdaef1d153b670b5f8691d7d37624 Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss at me.com>
Date: Thu, 30 Jan 2025 22:17:46 +0100
Subject: [PATCH] [offload] Fix typos discovered by codespell

---
 offload/DeviceRTL/include/Configuration.h     |  2 +-
 offload/DeviceRTL/include/Mapping.h           |  2 +-
 offload/DeviceRTL/include/State.h             |  6 +--
 offload/DeviceRTL/include/Synchronization.h   |  2 +-
 offload/DeviceRTL/src/Configuration.cpp       |  2 +-
 offload/DeviceRTL/src/Misc.cpp                |  2 +-
 offload/DeviceRTL/src/Reduction.cpp           |  4 +-
 offload/DeviceRTL/src/Synchronization.cpp     |  8 ++--
 offload/DeviceRTL/src/Workshare.cpp           |  6 +--
 offload/include/OpenMP/OMPT/Callback.h        |  2 +-
 offload/include/PluginManager.h               |  4 +-
 offload/include/device.h                      |  2 +-
 offload/include/omptarget.h                   |  2 +-
 offload/liboffload/API/APIDefs.td             |  2 +-
 offload/liboffload/API/README.md              |  2 +-
 offload/liboffload/src/OffloadLib.cpp         |  2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    | 48 +++++++++----------
 offload/plugins-nextgen/common/CMakeLists.txt |  2 +-
 .../common/include/GlobalHandler.h            |  4 +-
 offload/plugins-nextgen/common/include/JIT.h  |  2 +-
 .../common/include/PluginInterface.h          | 18 +++----
 offload/plugins-nextgen/common/include/RPC.h  |  4 +-
 .../common/src/GlobalHandler.cpp              |  2 +-
 .../common/src/PluginInterface.cpp            | 14 +++---
 offload/plugins-nextgen/cuda/src/rtl.cpp      | 10 ++--
 .../plugins-nextgen/host/dynamic_ffi/ffi.h    |  2 +-
 offload/src/OpenMP/API.cpp                    |  4 +-
 offload/src/OpenMP/Mapping.cpp                |  4 +-
 offload/src/PluginManager.cpp                 |  4 +-
 offload/src/device.cpp                        |  4 +-
 offload/src/interface.cpp                     |  6 +--
 offload/src/omptarget.cpp                     |  4 +-
 offload/test/api/omp_target_memcpy_async1.c   |  2 +-
 offload/test/mapping/target_uses_allocator.c  |  2 +-
 .../offloading/fortran/dump_map_tables.f90    |  2 +-
 .../fortran/implicit-record-field-mapping.f90 |  2 +-
 .../fortran/local-descriptor-map-regress.f90  |  4 +-
 .../target-map-derived-type-full-1.f90        |  2 +-
 ...arget-map-derived-type-full-implicit-1.f90 |  2 +-
 ...arget-map-derived-type-full-implicit-2.f90 |  2 +-
 .../parallel_target_teams_reduction_max.cpp   |  2 +-
 .../parallel_target_teams_reduction_min.cpp   |  2 +-
 .../struct_mapping_with_pointers.cpp          |  2 +-
 .../unified_shared_memory/associate_ptr.c     |  2 +-
 .../test/unified_shared_memory/close_member.c |  2 +-
 .../kernelreplay/llvm-omp-kernel-replay.cpp   |  2 +-
 46 files changed, 106 insertions(+), 106 deletions(-)

diff --git a/offload/DeviceRTL/include/Configuration.h b/offload/DeviceRTL/include/Configuration.h
index f8b7a6c3c6c9da..95408933dd8653 100644
--- a/offload/DeviceRTL/include/Configuration.h
+++ b/offload/DeviceRTL/include/Configuration.h
@@ -27,7 +27,7 @@ uint32_t getNumDevices();
 /// Return the device number in the system for omp_get_device_num.
 uint32_t getDeviceNum();
 
-/// Return the user choosen debug level.
+/// Return the user chosen debug level.
 uint32_t getDebugKind();
 
 /// Return if teams oversubscription is assumed
diff --git a/offload/DeviceRTL/include/Mapping.h b/offload/DeviceRTL/include/Mapping.h
index 2fb87abe5418c0..2217eb7616b386 100644
--- a/offload/DeviceRTL/include/Mapping.h
+++ b/offload/DeviceRTL/include/Mapping.h
@@ -63,7 +63,7 @@ LaneMaskTy activemask();
 /// Return a mask describing all threads with a smaller Id in the warp.
 LaneMaskTy lanemaskLT();
 
-/// Return a mask describing all threads with a larget Id in the warp.
+/// Return a mask describing all threads with a larger Id in the warp.
 LaneMaskTy lanemaskGT();
 
 /// Return the thread Id in the warp, in [0, getWarpSize()).
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index c487ff29680faa..f0500c1083d7f4 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -33,7 +33,7 @@ namespace memory {
 /// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
 void *allocShared(uint64_t Size, const char *Reason);
 
-/// Free \p Ptr, alloated via allocShared, for \p Reason.
+/// Free \p Ptr, allocated via allocShared, for \p Reason.
 ///
 /// Note: See the restrictions on __kmpc_free_shared for proper usage.
 void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
@@ -44,7 +44,7 @@ void *allocGlobal(uint64_t Size, const char *Reason);
 /// Return a pointer to the dynamic shared memory buffer.
 void *getDynamicBuffer();
 
-/// Free \p Ptr, alloated via allocGlobal, for \p Reason.
+/// Free \p Ptr, allocated via allocGlobal, for \p Reason.
 void freeGlobal(void *Ptr, const char *Reason);
 
 } // namespace memory
@@ -365,7 +365,7 @@ inline state::Value<uint32_t, state::VK_Level> Level;
 /// The `active-level` describes which of the parallel level counted with the
 /// `level-var` is active. There can only be one.
 ///
-/// active-level-var is 1, if ActiveLevelVar is not 0, otherweise it is 0.
+/// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0.
 inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;
 
 /// TODO
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index 5a789441b9d35f..5045d3c2c99a33 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -22,7 +22,7 @@ namespace atomic {
 
 enum OrderingTy {
   relaxed = __ATOMIC_RELAXED,
-  aquire = __ATOMIC_ACQUIRE,
+  acquire = __ATOMIC_ACQUIRE,
   release = __ATOMIC_RELEASE,
   acq_rel = __ATOMIC_ACQ_REL,
   seq_cst = __ATOMIC_SEQ_CST,
diff --git a/offload/DeviceRTL/src/Configuration.cpp b/offload/DeviceRTL/src/Configuration.cpp
index 9e14c203d4a04e..0b488b8034178d 100644
--- a/offload/DeviceRTL/src/Configuration.cpp
+++ b/offload/DeviceRTL/src/Configuration.cpp
@@ -27,7 +27,7 @@ using namespace ompx;
     0;
 [[gnu::weak]] extern const uint32_t __omp_rtl_assume_teams_oversubscription = 0;
 
-// This variable should be visibile to the plugin so we override the default
+// This variable should be visible to the plugin so we override the default
 // hidden visibility.
 [[gnu::used, gnu::retain, gnu::weak,
   gnu::visibility("protected")]] DeviceEnvironmentTy
diff --git a/offload/DeviceRTL/src/Misc.cpp b/offload/DeviceRTL/src/Misc.cpp
index a6660d6853e477..010474b1c4a74d 100644
--- a/offload/DeviceRTL/src/Misc.cpp
+++ b/offload/DeviceRTL/src/Misc.cpp
@@ -33,7 +33,7 @@ double getWTime();
 
 double getWTick() {
   // The number of ticks per second for the AMDGPU clock varies by card and can
-  // only be retrived by querying the driver. We rely on the device environment
+  // only be retrieved by querying the driver. We rely on the device environment
   // to inform us what the proper frequency is.
   return 1.0 / config::getClockFrequency();
 }
diff --git a/offload/DeviceRTL/src/Reduction.cpp b/offload/DeviceRTL/src/Reduction.cpp
index d3b4528401953c..382f6cf392e91a 100644
--- a/offload/DeviceRTL/src/Reduction.cpp
+++ b/offload/DeviceRTL/src/Reduction.cpp
@@ -206,7 +206,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
   // to the number of slots in the buffer.
   bool IsMaster = (ThreadId == 0);
   while (IsMaster) {
-    Bound = atomic::load(&IterCnt, atomic::aquire);
+    Bound = atomic::load(&IterCnt, atomic::acquire);
     if (TeamId < Bound + num_of_records)
       break;
   }
@@ -259,7 +259,7 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
   unsigned NumRecs = kmpcMin(NumTeams, uint32_t(num_of_records));
   if (ChunkTeamCount == NumTeams - Bound - 1) {
     // Ensure we see the global memory writes by other teams
-    fence::kernel(atomic::aquire);
+    fence::kernel(atomic::acquire);
 
     //
     // Last team processing.
diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp
index e0e277928fa910..b09d4801faa012 100644
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -84,7 +84,7 @@ uint32_t atomicInc(uint32_t *A, uint32_t V, atomic::OrderingTy Ordering,
   default:
     __builtin_unreachable();
     Case(atomic::relaxed);
-    Case(atomic::aquire);
+    Case(atomic::acquire);
     Case(atomic::release);
     Case(atomic::acq_rel);
     Case(atomic::seq_cst);
@@ -107,7 +107,7 @@ void namedBarrier() {
   uint32_t WarpSize = mapping::getWarpSize();
   uint32_t NumWaves = NumThreads / WarpSize;
 
-  fence::team(atomic::aquire);
+  fence::team(atomic::acquire);
 
   // named barrier implementation for amdgcn.
   // Uses two 16 bit unsigned counters. One for the number of waves to have
@@ -172,7 +172,7 @@ void syncThreads(atomic::OrderingTy Ordering) {
   __builtin_amdgcn_s_barrier();
 
   if (Ordering != atomic::relaxed)
-    fenceTeam(Ordering == atomic::acq_rel ? atomic::aquire : atomic::seq_cst);
+    fenceTeam(Ordering == atomic::acq_rel ? atomic::acquire : atomic::seq_cst);
 }
 void syncThreadsAligned(atomic::OrderingTy Ordering) { syncThreads(Ordering); }
 
@@ -198,7 +198,7 @@ void setCriticalLock(omp_lock_t *Lock) {
         !cas((uint32_t *)Lock, UNSET, SET, atomic::relaxed, atomic::relaxed)) {
       __builtin_amdgcn_s_sleep(32);
     }
-    fenceKernel(atomic::aquire);
+    fenceKernel(atomic::acquire);
   }
 }
 
diff --git a/offload/DeviceRTL/src/Workshare.cpp b/offload/DeviceRTL/src/Workshare.cpp
index ad60e66548be90..cb83f1b670c9ee 100644
--- a/offload/DeviceRTL/src/Workshare.cpp
+++ b/offload/DeviceRTL/src/Workshare.cpp
@@ -79,7 +79,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
     lb = lb + entityId * chunk;
     T inputUb = ub;
     ub = lb + chunk - 1; // Clang uses i <= ub
-    // Say ub' is the begining of the last chunk. Then who ever has a
+    // Say ub' is the beginning of the last chunk. Then who ever has a
     // lower bound plus a multiple of the increment equal to ub' is
     // the last one.
     T beginingLastChunk = inputUb - (inputUb % chunk);
@@ -806,7 +806,7 @@ template <typename Ty> class StaticLoopChunker {
                                 NumIters, OneIterationPerThread);
   }
 
-  /// Worksharing `distrbute`-loop.
+  /// Worksharing `distribute`-loop.
   static void Distribute(IdentTy *Loc, void (*LoopBody)(Ty, void *), void *Arg,
                          Ty NumIters, Ty BlockChunk) {
     ASSERT(icv::Level == 0, "Bad distribute");
@@ -853,7 +853,7 @@ template <typename Ty> class StaticLoopChunker {
     ASSERT(state::ParallelTeamSize == 1, "Bad distribute");
   }
 
-  /// Worksharing `distrbute parallel for`-loop.
+  /// Worksharing `distribute parallel for`-loop.
   static void DistributeFor(IdentTy *Loc, void (*LoopBody)(Ty, void *),
                             void *Arg, Ty NumIters, Ty NumThreads,
                             Ty BlockChunk, Ty ThreadChunk) {
diff --git a/offload/include/OpenMP/OMPT/Callback.h b/offload/include/OpenMP/OMPT/Callback.h
index 68cb43745eb1f8..9d545c643223f5 100644
--- a/offload/include/OpenMP/OMPT/Callback.h
+++ b/offload/include/OpenMP/OMPT/Callback.h
@@ -56,7 +56,7 @@ FOREACH_OMPT_EMI_EVENT(declareOmptCallback)
 
 /// This function will call an OpenMP API function. Which in turn will lookup a
 /// given enum value of type \p ompt_callbacks_t and copy the address of the
-/// corresponding callback funtion into the provided pointer.
+/// corresponding callback function into the provided pointer.
 /// The pointer to the runtime function is passed during 'initializeLibrary'.
 /// \p which the enum value of the requested callback function
 /// \p callback the destination pointer where the address shall be copied
diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h
index f4febac69c45eb..ec3adadf0819b0 100644
--- a/offload/include/PluginManager.h
+++ b/offload/include/PluginManager.h
@@ -77,7 +77,7 @@ struct PluginManager {
   /// Iterate over all device images registered with this plugin.
   auto deviceImages() { return llvm::make_pointee_range(DeviceImages); }
 
-  /// Translation table retreived from the binary
+  /// Translation table retrieved from the binary
   HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
   std::mutex TrlTblMtx; ///< For Translation Table
   /// Host offload entries in order of image registration
@@ -171,7 +171,7 @@ struct PluginManager {
   /// Devices associated with plugins, accesses to the container are exclusive.
   ProtectedObj<DeviceContainerTy> Devices;
 
-  /// References to upgraded legacy offloading entires.
+  /// References to upgraded legacy offloading entries.
   std::list<llvm::SmallVector<llvm::offloading::EntryTy, 0>> LegacyEntries;
   std::list<llvm::SmallVector<__tgt_device_image, 0>> LegacyImages;
   llvm::DenseMap<__tgt_bin_desc *, __tgt_bin_desc> UpgradedDescriptors;
diff --git a/offload/include/device.h b/offload/include/device.h
index 3132d35b7b38c8..f4b10abbaa3fda 100644
--- a/offload/include/device.h
+++ b/offload/include/device.h
@@ -134,7 +134,7 @@ struct DeviceTy {
   int32_t recordEvent(void *Event, AsyncInfoTy &AsyncInfo);
 
   /// Wait for an event. This function can be blocking or non-blocking,
-  /// depending on the implmentation. It is expected to set a dependence on the
+  /// depending on the implementation. It is expected to set a dependence on the
   /// event such that corresponding operations shall only start once the event
   /// is fulfilled.
   int32_t waitEvent(void *Event, AsyncInfoTy &AsyncInfo);
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 2b6445e9fbe550..6971780c7bdb5a 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -42,7 +42,7 @@ enum __tgt_target_return_t : int {
   OMP_TGT_SUCCESS = 0,
   /// offload may not execute on the requested target device
   /// this scenario can be caused by the device not available or unsupported
-  /// as described in the Execution Model in the specifcation
+  /// as described in the Execution Model in the specification
   /// this status may not be used for target device execution failure
   /// which should be handled internally in libomptarget
   OMP_TGT_FAIL = ~0
diff --git a/offload/liboffload/API/APIDefs.td b/offload/liboffload/API/APIDefs.td
index 60c1b85d26911e..cee4adea1d9f64 100644
--- a/offload/liboffload/API/APIDefs.td
+++ b/offload/liboffload/API/APIDefs.td
@@ -36,7 +36,7 @@ class IsPointerType<string Type> {
   bit ret = !ne(!find(Type, "*", !sub(!size(Type), 1)), -1);
 }
 
-// Describes the valid range of a pointer parameter that reperesents an array
+// Describes the valid range of a pointer parameter that represents an array
 class Range<string Begin, string End> {
   string begin = Begin;
   string end = End;
diff --git a/offload/liboffload/API/README.md b/offload/liboffload/API/README.md
index 38a055811b2d00..b59ac2782a2be5 100644
--- a/offload/liboffload/API/README.md
+++ b/offload/liboffload/API/README.md
@@ -62,7 +62,7 @@ which preserves ABI compatibility with C.
 Represents a C-style enum. Contains a list of `etor` values, which have a name
 and description.
 
-A `TaggedEtor` record type also exists which addtionally takes a type. This type
+A `TaggedEtor` record type also exists which additionally takes a type. This type
 is used when the enum is used as a parameter to a function with a type-tagged
 function parameter (e.g. `olGetDeviceInfo`).
 
diff --git a/offload/liboffload/src/OffloadLib.cpp b/offload/liboffload/src/OffloadLib.cpp
index 37876713212c98..70e1ce1f84d831 100644
--- a/offload/liboffload/src/OffloadLib.cpp
+++ b/offload/liboffload/src/OffloadLib.cpp
@@ -36,7 +36,7 @@ OffloadConfig &offloadConfig() {
   return Config;
 }
 
-// Pull in the declarations for the implementation funtions. The actual entry
+// Pull in the declarations for the implementation functions. The actual entry
 // points in this file wrap these.
 #include "OffloadImplFuncDecls.inc"
 
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 6fc75ac1542894..92184ba796dbd8 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -436,7 +436,7 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
   /// have more previously allocated buffers.
   void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) override;
 
-  /// Deallocation callack that will be called by the memory manager.
+  /// Deallocation callback that will be called by the memory manager.
   int free(void *TgtPtr, TargetAllocTy Kind) override {
     if (auto Err = MemoryPool->deallocate(TgtPtr)) {
       consumeError(std::move(Err));
@@ -493,7 +493,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
   }
 
 private:
-  /// The exectuable loaded on the agent.
+  /// The executable loaded on the agent.
   hsa_executable_t Executable;
   StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
   uint16_t ELFABIVersion;
@@ -876,7 +876,7 @@ struct AMDGPUQueueTy {
     hsa_signal_store_relaxed(Queue->doorbell_signal, PacketId);
   }
 
-  /// Callack that will be called when an error is detected on the HSA queue.
+  /// Callback that will be called when an error is detected on the HSA queue.
   static void callbackError(hsa_status_t Status, hsa_queue_t *Source,
                             void *Data);
 
@@ -932,7 +932,7 @@ struct AMDGPUStreamTy {
   /// operation's output signal is set to the consumed slot's signal. If there
   /// is a previous asynchronous operation on the previous slot, the HSA async
   /// operation's input signal is set to the signal of the previous slot. This
-  /// way, we obtain a chain of dependant async operations. The action is a
+  /// way, we obtain a chain of dependent async operations. The action is a
   /// function that will be executed eventually after the operation is
   /// completed, e.g., for releasing a buffer.
   struct StreamSlotTy {
@@ -1055,10 +1055,10 @@ struct AMDGPUStreamTy {
   /// Timeout hint for HSA actively waiting for signal value to change
   const uint64_t StreamBusyWaitMicroseconds;
 
-  /// Indicate to spread data transfers across all avilable SDMAs
+  /// Indicate to spread data transfers across all available SDMAs
   bool UseMultipleSdmaEngines;
 
-  /// Return the current number of asychronous operations on the stream.
+  /// Return the current number of asynchronous operations on the stream.
   uint32_t size() const { return NextSlot; }
 
   /// Return the last valid slot on the stream.
@@ -1155,12 +1155,12 @@ struct AMDGPUStreamTy {
     // changes on the slot.
     std::atomic_thread_fence(std::memory_order_acquire);
 
-    // Peform the operation.
+    // Perform the operation.
     if (auto Err = Slot->performAction())
-      FATAL_MESSAGE(1, "Error peforming post action: %s",
+      FATAL_MESSAGE(1, "Error performing post action: %s",
                     toString(std::move(Err)).data());
 
-    // Signal the output signal to notify the asycnhronous operation finalized.
+    // Signal the output signal to notify the asynchronous operation finalized.
     Slot->Signal->signal();
 
     // Unregister callback.
@@ -1183,9 +1183,9 @@ struct AMDGPUStreamTy {
   /// action. There are two kinds of memory buffers:
   ///   1. For kernel arguments. This buffer can be freed after receiving the
   ///   kernel completion signal.
-  ///   2. For H2D tranfers that need pinned memory space for staging. This
+  ///   2. For H2D transfers that need pinned memory space for staging. This
   ///   buffer can be freed after receiving the transfer completion signal.
-  ///   3. For D2H tranfers that need pinned memory space for staging. This
+  ///   3. For D2H transfers that need pinned memory space for staging. This
   ///   buffer cannot be freed after receiving the transfer completion signal
   ///   because of the following asynchronous H2H callback.
   ///      For this reason, This action can only be taken at
@@ -1222,7 +1222,7 @@ struct AMDGPUStreamTy {
   /// Create an empty stream associated with a specific device.
   AMDGPUStreamTy(AMDGPUDeviceTy &Device);
 
-  /// Intialize the stream's signals.
+  /// Initialize the stream's signals.
   Error init() { return Plugin::success(); }
 
   /// Deinitialize the stream's signals.
@@ -1312,7 +1312,7 @@ struct AMDGPUStreamTy {
   /// Push an asynchronous memory copy device-to-host involving an unpinned
   /// memory buffer. The operation consists of a two-step copy from the
   /// device buffer to an intermediate pinned host buffer, and then, to a
-  /// unpinned host buffer. Both operations are asynchronous and dependant.
+  /// unpinned host buffer. Both operations are asynchronous and dependent.
   /// The intermediate pinned buffer will be released to the specified memory
   /// manager once the operation completes.
   Error pushMemoryCopyD2HAsync(void *Dst, const void *Src, void *Inter,
@@ -1374,7 +1374,7 @@ struct AMDGPUStreamTy {
   /// Push an asynchronous memory copy host-to-device involving an unpinned
   /// memory buffer. The operation consists of a two-step copy from the
   /// unpinned host buffer to an intermediate pinned host buffer, and then, to
-  /// the pinned host buffer. Both operations are asynchronous and dependant.
+  /// the pinned host buffer. Both operations are asynchronous and dependent.
   /// The intermediate pinned buffer will be released to the specified memory
   /// manager once the operation completes.
   Error pushMemoryCopyH2DAsync(void *Dst, const void *Src, void *Inter,
@@ -1672,7 +1672,7 @@ struct AMDGPUStreamManagerTy final
   }
 
 private:
-  /// Search for and assign an prefereably idle queue to the given Stream. If
+  /// Search for and assign an preferably idle queue to the given Stream. If
   /// there is no queue without current users, choose the queue with the lowest
   /// user count. If utilization is ignored: use round robin selection.
   inline Error assignNextQueue(AMDGPUStreamTy *Stream) {
@@ -1856,13 +1856,13 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
   /// Get a memory pool for fine-grained allocations.
   AMDGPUMemoryPoolTy &getFineGrainedMemoryPool() {
     assert(!FineGrainedMemoryPools.empty() && "No fine-grained mempool");
-    // Retrive any memory pool.
+    // Retrieve any memory pool.
     return *FineGrainedMemoryPools[0];
   }
 
   AMDGPUMemoryPoolTy &getCoarseGrainedMemoryPool() {
     assert(!CoarseGrainedMemoryPools.empty() && "No coarse-grained mempool");
-    // Retrive any memory pool.
+    // Retrieve any memory pool.
     return *CoarseGrainedMemoryPools[0];
   }
 
@@ -1937,7 +1937,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
                          ClockFrequency) != HSA_STATUS_SUCCESS)
       ClockFrequency = 0;
 
-    // Load the grid values dependending on the wavefront.
+    // Load the grid values depending on the wavefront.
     if (WavefrontSize == 32)
       GridValues = getAMDGPUGridValues<32>();
     else if (WavefrontSize == 64)
@@ -2097,7 +2097,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
     std::string LLDPath = ErrorOrPath.get();
     INFO(OMP_INFOTYPE_PLUGIN_KERNEL, getDeviceId(),
-         "Using `%s` to link JITed amdgcn ouput.", LLDPath.c_str());
+         "Using `%s` to link JITed amdgcn output.", LLDPath.c_str());
 
     std::string MCPU = "-plugin-opt=mcpu=" + getComputeUnitKind();
     StringRef Args[] = {LLDPath,
@@ -2158,15 +2158,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   }
 
   /// We want to set up the RPC server for host services to the GPU if it is
-  /// availible.
+  /// available.
   bool shouldSetupRPCServer() const override { return true; }
 
-  /// The RPC interface should have enough space for all availible parallelism.
+  /// The RPC interface should have enough space for all available parallelism.
   uint64_t requestedRPCPortCount() const override {
     return getHardwareParallelism();
   }
 
-  /// Get the stream of the asynchronous info sructure or get a new one.
+  /// Get the stream of the asynchronous info structure or get a new one.
   Error getStream(AsyncInfoWrapperTy &AsyncInfoWrapper,
                   AMDGPUStreamTy *&Stream) {
     // Get the stream (if any) from the async info.
@@ -2716,7 +2716,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       Status =
           Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
       if (Status == HSA_STATUS_SUCCESS)
-        Info.add<InfoLevel3>("Accessable by all", TmpBool);
+        Info.add<InfoLevel3>("Accessible by all", TmpBool);
     }
 
     Info.add("ISAs");
@@ -2895,7 +2895,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
   /// Envar specifying the maximum size in bytes where the memory copies are
   /// asynchronous operations. Up to this transfer size, the memory copies are
-  /// asychronous operations pushed to the corresponding stream. For larger
+  /// asynchronous operations pushed to the corresponding stream. For larger
   /// transfers, they are synchronous transfers.
   UInt32Envar OMPX_MaxAsyncCopyBytes;
 
diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt
index 14c48f6ace9710..de219efc8f79cc 100644
--- a/offload/plugins-nextgen/common/CMakeLists.txt
+++ b/offload/plugins-nextgen/common/CMakeLists.txt
@@ -21,7 +21,7 @@ if (NOT LLVM_LINK_LLVM_DYLIB)
   endforeach()
 endif()
 
-# Include the RPC server from the `libc` project if availible.
+# Include the RPC server from the `libc` project if available.
 include(FindLibcCommonUtils)
 target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities)
 if(TARGET llvmlibc_rpc_server AND ${LIBOMPTARGET_GPU_LIBC_SUPPORT})
diff --git a/offload/plugins-nextgen/common/include/GlobalHandler.h b/offload/plugins-nextgen/common/include/GlobalHandler.h
index d2914e7cd0eb4f..bb0e907ecc294a 100644
--- a/offload/plugins-nextgen/common/include/GlobalHandler.h
+++ b/offload/plugins-nextgen/common/include/GlobalHandler.h
@@ -1,4 +1,4 @@
-//===- GlobalHandler.h - Target independent global & enviroment handling --===//
+//===- GlobalHandler.h - Target independent global & environment handling --===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -100,7 +100,7 @@ template <typename Ty> class StaticGlobalTy : public GlobalTy {
 
 /// Helper class to do the heavy lifting when it comes to moving globals between
 /// host and device. Through the GenericDeviceTy we access memcpy DtoH and HtoD,
-/// which means the only things specialized by the subclass is the retrival of
+/// which means the only things specialized by the subclass is the retrieval of
 /// global metadata (size, addr) from the device.
 /// \see getGlobalMetadataFromDevice
 class GenericGlobalHandlerTy {
diff --git a/offload/plugins-nextgen/common/include/JIT.h b/offload/plugins-nextgen/common/include/JIT.h
index 4414926a6178f2..8c530436a754b2 100644
--- a/offload/plugins-nextgen/common/include/JIT.h
+++ b/offload/plugins-nextgen/common/include/JIT.h
@@ -59,7 +59,7 @@ struct JITEngine {
   /// Compile the bitcode image \p Image and generate the binary image that can
   /// be loaded to the target device of the triple \p Triple architecture \p
   /// MCpu. \p PostProcessing will be called after codegen to handle cases such
-  /// as assember as an external tool.
+  /// as assembler as an external tool.
   Expected<const __tgt_device_image *>
   compile(const __tgt_device_image &Image, const std::string &ComputeUnitKind,
           PostProcessingFn PostProcessing);
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index eb266e8d4d451a..a30589e039468d 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -92,14 +92,14 @@ struct AsyncInfoWrapperTy {
   }
 
   /// Synchronize with the __tgt_async_info's pending operations if it's the
-  /// internal async info. The error associated to the aysnchronous operations
+  /// internal async info. The error associated to the asynchronous operations
   /// issued in this queue must be provided in \p Err. This function will update
   /// the error parameter with the result of the synchronization if it was
   /// actually executed. This function must be called before destroying the
   /// object and only once.
   void finalize(Error &Err);
 
-  /// Register \p Ptr as an associated alloction that is freed after
+  /// Register \p Ptr as an associated allocation that is freed after
   /// finalization.
   void freeAllocationAfterSynchronization(void *Ptr) {
     AsyncInfoPtr->AssociatedAllocations.push_back(Ptr);
@@ -456,7 +456,7 @@ struct KernelTraceInfoRecordTy {
 };
 
 /// Class representing a map of host pinned allocations. We track these pinned
-/// allocations, so memory tranfers invloving these buffers can be optimized.
+/// allocations, so memory transfers involving these buffers can be optimized.
 class PinnedAllocationMapTy {
 
   /// Struct representing a map entry.
@@ -482,7 +482,7 @@ class PinnedAllocationMapTy {
     /// becomes zero.
     mutable size_t References;
 
-    /// Create an entry with the host and device acessible pointers, the buffer
+    /// Create an entry with the host and device accessible pointers, the buffer
     /// size, and a boolean indicating whether the buffer was locked externally.
     EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size,
             bool ExternallyLocked)
@@ -517,7 +517,7 @@ class PinnedAllocationMapTy {
   /// Indicate whether mapped host buffers should be locked automatically.
   bool LockMappedBuffers;
 
-  /// Indicate whether failures when locking mapped buffers should be ingored.
+  /// Indicate whether failures when locking mapped buffers should be ignored.
   bool IgnoreLockMappedFailures;
 
   /// Find an allocation that intersects with \p HstPtr pointer. Assume the
@@ -1122,7 +1122,7 @@ struct GenericPluginTy {
   /// Get the reference to the device with a certain device id.
   GenericDeviceTy &getDevice(int32_t DeviceId) {
     assert(isValidDeviceId(DeviceId) && "Invalid device id");
-    assert(Devices[DeviceId] && "Device is unitialized");
+    assert(Devices[DeviceId] && "Device is uninitialized");
 
     return *Devices[DeviceId];
   }
@@ -1270,7 +1270,7 @@ struct GenericPluginTy {
   int32_t data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
                         int64_t Size);
 
-  /// Copy data from the given device asynchornously.
+  /// Copy data from the given device asynchronously.
   int32_t data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr,
                               int64_t Size, __tgt_async_info *AsyncInfoPtr);
 
@@ -1308,7 +1308,7 @@ struct GenericPluginTy {
   int32_t wait_event(int32_t DeviceId, void *EventPtr,
                      __tgt_async_info *AsyncInfoPtr);
 
-  /// Syncrhonize execution until an event is done.
+  /// Synchronize execution until an event is done.
   int32_t sync_event(int32_t DeviceId, void *EventPtr);
 
   /// Remove the event from the plugin.
@@ -1327,7 +1327,7 @@ struct GenericPluginTy {
   /// Sets the offset into the devices for use by OMPT.
   int32_t set_device_identifier(int32_t UserId, int32_t DeviceId);
 
-  /// Returns if the plugin can support auotmatic copy.
+  /// Returns if the plugin can support automatic copy.
   int32_t use_auto_zero_copy(int32_t DeviceId);
 
   /// Look up a global symbol in the given binary.
diff --git a/offload/plugins-nextgen/common/include/RPC.h b/offload/plugins-nextgen/common/include/RPC.h
index 42fca4aa4aebcc..d750ce30e74b05 100644
--- a/offload/plugins-nextgen/common/include/RPC.h
+++ b/offload/plugins-nextgen/common/include/RPC.h
@@ -34,7 +34,7 @@ class DeviceImageTy;
 } // namespace plugin
 
 /// A generic class implementing the interface between the RPC server provided
-/// by the 'libc' project and 'libomptarget'. If the RPC server is not availible
+/// by the 'libc' project and 'libomptarget'. If the RPC server is not available
 /// these routines will perform no action.
 struct RPCServerTy {
 public:
@@ -48,7 +48,7 @@ struct RPCServerTy {
   llvm::Error startThread();
 
   /// Check if this device image is using an RPC server. This checks for the
-  /// precense of an externally visible symbol in the device image that will
+  /// presence of an externally visible symbol in the device image that will
   /// be present whenever RPC code is called.
   llvm::Expected<bool> isDeviceUsingRPC(plugin::GenericDeviceTy &Device,
                                         plugin::GenericGlobalHandlerTy &Handler,
diff --git a/offload/plugins-nextgen/common/src/GlobalHandler.cpp b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
index 8ad7f15990ac6a..8854fc52205a77 100644
--- a/offload/plugins-nextgen/common/src/GlobalHandler.cpp
+++ b/offload/plugins-nextgen/common/src/GlobalHandler.cpp
@@ -67,7 +67,7 @@ Error GenericGlobalHandlerTy::moveGlobalBetweenDeviceAndHost(
       return Err;
   }
 
-  DP("Succesfully %s %u bytes associated with global symbol '%s' %s the "
+  DP("Successfully %s %u bytes associated with global symbol '%s' %s the "
      "device "
      "(%p -> %p).\n",
      Device2Host ? "read" : "write", HostGlobal.getSize(),
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 16f510de3ecc5c..427cd38e4ba05e 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -738,7 +738,7 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
       OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
       // Do not initialize the following two envars since they depend on the
       // device initialization. These cannot be consulted until the device is
-      // initialized correctly. We intialize them in GenericDeviceTy::init().
+      // initialized correctly. We initialize them in GenericDeviceTy::init().
       OMPX_TargetStackSize(), OMPX_TargetHeapSize(),
       // By default, the initial number of streams and events is 1.
       OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 1),
@@ -1040,7 +1040,7 @@ Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
 
 Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
                                       DeviceImageTy &Image) {
-  // The plugin either does not need an RPC server or it is unavailible.
+  // The plugin either does not need an RPC server or it is unavailable.
   if (!shouldSetupRPCServer())
     return Plugin::success();
 
@@ -1325,16 +1325,16 @@ Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
 }
 
 Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
-  return Plugin::error("Device does not suppport VA Management");
+  return Plugin::error("Device does not support VA Management");
 }
 
 Error GenericDeviceTy::memoryVAUnMap(void *VAddr, size_t Size) {
-  return Plugin::error("Device does not suppport VA Management");
+  return Plugin::error("Device does not support VA Management");
 }
 
 Error GenericDeviceTy::getDeviceMemorySize(uint64_t &DSize) {
   return Plugin::error(
-      "Mising getDeviceMemorySize impelmentation (required by RR-heuristic");
+      "Missing getDeviceMemorySize implementation (required by RR-heuristic");
 }
 
 Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
@@ -1814,7 +1814,7 @@ int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId,
 
   if (auto Err = RecordReplay->init(&Device, MemorySize, VAddr, Status,
                                     SaveOutput, ReqPtrArgOffset)) {
-    REPORT("WARNING RR did not intialize RR-properly with %lu bytes"
+    REPORT("WARNING RR did not initialize RR-properly with %lu bytes"
            "(Error: %s)\n",
            MemorySize, toString(std::move(Err)).data());
     RecordReplay->setStatus(RecordReplayTy::RRStatusTy::RRDeactivated);
@@ -1961,7 +1961,7 @@ int32_t GenericPluginTy::data_retrieve_async(int32_t DeviceId, void *HstPtr,
   auto Err =
       getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr);
   if (Err) {
-    REPORT("Faliure to copy data from device to host. Pointers: host "
+    REPORT("Failure to copy data from device to host. Pointers: host "
            "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
            DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
            toString(std::move(Err)).data());
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 52e8a100dc87b9..0d0c4858aa7fa0 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -395,7 +395,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
 
   virtual Error callGlobalConstructors(GenericPluginTy &Plugin,
                                        DeviceImageTy &Image) override {
-    // Check for the presense of global destructors at initialization time. This
+    // Check for the presence of global destructors at initialization time. This
     // is required when the image may be deallocated before destructors are run.
     GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
     if (Handler.isSymbolInImage(*this, Image, "nvptx$device$fini"))
@@ -495,15 +495,15 @@ struct CUDADeviceTy : public GenericDeviceTy {
   }
 
   /// We want to set up the RPC server for host services to the GPU if it is
-  /// availible.
+  /// available.
   bool shouldSetupRPCServer() const override { return true; }
 
-  /// The RPC interface should have enough space for all availible parallelism.
+  /// The RPC interface should have enough space for all available parallelism.
   uint64_t requestedRPCPortCount() const override {
     return getHardwareParallelism();
   }
 
-  /// Get the stream of the asynchronous info sructure or get a new one.
+  /// Get the stream of the asynchronous info structure or get a new one.
   Error getStream(AsyncInfoWrapperTy &AsyncInfoWrapper, CUstream &Stream) {
     // Get the stream (if any) from the async info.
     Stream = AsyncInfoWrapper.getQueueAs<CUstream>();
@@ -675,7 +675,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
     if (Size >= Free) {
       *Addr = nullptr;
       return Plugin::error(
-          "Canot map memory size larger than the available device memory");
+          "Cannot map memory size larger than the available device memory");
     }
 
     // currently NVidia only supports pinned device types
diff --git a/offload/plugins-nextgen/host/dynamic_ffi/ffi.h b/offload/plugins-nextgen/host/dynamic_ffi/ffi.h
index 4a9e88cc4dc9cc..9fd3c43b75fad7 100644
--- a/offload/plugins-nextgen/host/dynamic_ffi/ffi.h
+++ b/offload/plugins-nextgen/host/dynamic_ffi/ffi.h
@@ -38,7 +38,7 @@ typedef enum {
   FFI_BAD_ARGTYPE
 } ffi_status;
 
-// These are target depenent so we set them manually for each ABI by referencing
+// These are target dependent so we set them manually for each ABI by referencing
 // the FFI source.
 typedef enum ffi_abi {
 #if (defined(_M_X64) || defined(__x86_64__))
diff --git a/offload/src/OpenMP/API.cpp b/offload/src/OpenMP/API.cpp
index e59bdba8abf0e4..62e27060b710d6 100644
--- a/offload/src/OpenMP/API.cpp
+++ b/offload/src/OpenMP/API.cpp
@@ -185,7 +185,7 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
   // omp_target_is_present tests whether a host pointer refers to storage that
   // is mapped to a given device. However, due to the lack of the storage size,
   // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
-  // lengh array) is mapped instead of the referred storage.
+  // length array) is mapped instead of the referred storage.
   TargetPointerResultTy TPR =
       DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
                                                    /*UpdateRefCount=*/false,
@@ -256,7 +256,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
       FATAL_MESSAGE(DstDevice, "%s",
                     toString(DstDeviceOrErr.takeError()).c_str());
     // First try to use D2D memcpy which is more efficient. If fails, fall back
-    // to unefficient way.
+    // to inefficient way.
     if (SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) {
       AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
       Rc = SrcDeviceOrErr->dataExchange(SrcAddr, *DstDeviceOrErr, DstAddr,
diff --git a/offload/src/OpenMP/Mapping.cpp b/offload/src/OpenMP/Mapping.cpp
index 595e3456ab54c0..4b78ed3360a266 100644
--- a/offload/src/OpenMP/Mapping.cpp
+++ b/offload/src/OpenMP/Mapping.cpp
@@ -149,7 +149,7 @@ LookupResult MappingInfoTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
     //   std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin
     if (Upper != HDTTMap->begin()) {
       LR.TPR.setEntry(std::prev(Upper)->HDTT, OwnedTPR);
-      // the left side of extended address range is satisified.
+      // the left side of extended address range is satisfied.
       // hp >= LR.TPR.getEntry()->HstPtrBegin || hp >=
       // LR.TPR.getEntry()->HstPtrBase
       LR.Flags.IsContained = HP < LR.TPR.getEntry()->HstPtrEnd ||
@@ -158,7 +158,7 @@ LookupResult MappingInfoTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
 
     if (!LR.Flags.IsContained && Upper != HDTTMap->end()) {
       LR.TPR.setEntry(Upper->HDTT, OwnedTPR);
-      // the right side of extended address range is satisified.
+      // the right side of extended address range is satisfied.
       // hp < LR.TPR.getEntry()->HstPtrEnd || hp < LR.TPR.getEntry()->HstPtrBase
       LR.Flags.IsContained = HP >= LR.TPR.getEntry()->HstPtrBase;
     }
diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp
index b389d2ddc206ff..f827d0ba4e99f2 100644
--- a/offload/src/PluginManager.cpp
+++ b/offload/src/PluginManager.cpp
@@ -189,7 +189,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
     if (Entry.Flags == OMP_REGISTER_REQUIRES)
       PM->addRequirements(Entry.Data);
 
-  // Extract the exectuable image and extra information if availible.
+  // Extract the executable image and extra information if available.
   for (int32_t i = 0; i < Desc->NumDeviceImages; ++i)
     PM->addDeviceImage(*Desc, Desc->DeviceImages[i]);
 
@@ -273,7 +273,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
 
   // Auto Zero-Copy can only be currently triggered when the system is an
   // homogeneous APU architecture without attached discrete GPUs.
-  // If all devices suggest to use it, change requirment flags to trigger
+  // If all devices suggest to use it, change requirement flags to trigger
   // zero-copy behavior when mapping memory.
   if (UseAutoZeroCopy)
     addRequirements(OMPX_REQ_AUTO_ZERO_COPY);
diff --git a/offload/src/device.cpp b/offload/src/device.cpp
index 943c7782787306..2beb4093572da7 100644
--- a/offload/src/device.cpp
+++ b/offload/src/device.cpp
@@ -196,7 +196,7 @@ int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
      DPxPTR(HstPtr), Size);
 
   if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
-    REPORT("Notifiying about data mapping failed.\n");
+    REPORT("Notifying about data mapping failed.\n");
     return OFFLOAD_FAIL;
   }
   return OFFLOAD_SUCCESS;
@@ -206,7 +206,7 @@ int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
   DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr));
 
   if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
-    REPORT("Notifiying about data unmapping failed.\n");
+    REPORT("Notifying about data unmapping failed.\n");
     return OFFLOAD_FAIL;
   }
   return OFFLOAD_SUCCESS;
diff --git a/offload/src/interface.cpp b/offload/src/interface.cpp
index ad84a43cef8af4..624320428403f6 100644
--- a/offload/src/interface.cpp
+++ b/offload/src/interface.cpp
@@ -39,7 +39,7 @@ using namespace llvm::omp::target::ompt;
 //
 // The return bool indicates if the offload is to the host device
 // There are three possible results:
-// - Return false if the taregt device is ready for offload
+// - Return false if the target device is ready for offload
 // - Return true without reporting a runtime error if offload is
 //   disabled, perhaps because the initial device was specified.
 // - Report a runtime error and return true.
@@ -366,8 +366,8 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
 
   int Rc = OFFLOAD_SUCCESS;
   Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo);
-  { // required to show syncronization
-    TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize", "", Loc);
+  { // required to show synchronization
+    TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: synchronize", "", Loc);
     if (Rc == OFFLOAD_SUCCESS)
       Rc = AsyncInfo.synchronize();
 
diff --git a/offload/src/omptarget.cpp b/offload/src/omptarget.cpp
index 89fa63347babe2..5b25d955dd320d 100644
--- a/offload/src/omptarget.cpp
+++ b/offload/src/omptarget.cpp
@@ -1409,7 +1409,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
   // API, we need the begin address itself, i.e. &A[N], as the API operates on
   // begin addresses, not bases. That's why we pass args and offsets as two
   // separate entities so that each plugin can do what it needs. This behavior
-  // was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
+  // was introduced via https://reviews.llvm.org/D33028 and commit 1546d319244c.
   SmallVector<void *> TgtArgs;
   SmallVector<ptrdiff_t> TgtOffsets;
 
@@ -1431,7 +1431,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
 
     // Clang might pass more values via the ArgPtrs to the runtime that we pass
     // on to the kernel.
-    // TOOD: Next time we adjust the KernelArgsTy we should introduce a new
+    // TODO: Next time we adjust the KernelArgsTy we should introduce a new
     // NumKernelArgs field.
     KernelArgs.NumArgs = TgtArgs.size();
   }
diff --git a/offload/test/api/omp_target_memcpy_async1.c b/offload/test/api/omp_target_memcpy_async1.c
index 1abcfde83dbd8c..defc7829fcdac6 100644
--- a/offload/test/api/omp_target_memcpy_async1.c
+++ b/offload/test/api/omp_target_memcpy_async1.c
@@ -1,6 +1,6 @@
 // RUN: %libomptarget-compile-and-run-generic
 
-// Test case for omp_target_memcpy_async, oringally from GCC
+// Test case for omp_target_memcpy_async, originally from GCC
 
 #include "stdio.h"
 #include <omp.h>
diff --git a/offload/test/mapping/target_uses_allocator.c b/offload/test/mapping/target_uses_allocator.c
index eb20e965c30bc9..c0d71b22733b38 100755
--- a/offload/test/mapping/target_uses_allocator.c
+++ b/offload/test/mapping/target_uses_allocator.c
@@ -54,7 +54,7 @@ int test_omp_aligned_alloc_on_device() {
 int main() {
   int errors = 0;
   if (test_omp_aligned_alloc_on_device())
-    printf("FAILE\n");
+    printf("FAILED\n");
   else
     // CHECK: PASSED
     printf("PASSED\n");
diff --git a/offload/test/offloading/fortran/dump_map_tables.f90 b/offload/test/offloading/fortran/dump_map_tables.f90
index efde4ee56ca1a0..424dec20665627 100644
--- a/offload/test/offloading/fortran/dump_map_tables.f90
+++ b/offload/test/offloading/fortran/dump_map_tables.f90
@@ -1,4 +1,4 @@
-! Offloading test with runtine call to ompx_dump_mapping_tables Fortran array
+! Offloading test with runtime call to ompx_dump_mapping_tables Fortran array
 ! writing some values and printing the variable mapped to device correctly
 ! receives the updates made on the device.
 ! REQUIRES: flang
diff --git a/offload/test/offloading/fortran/implicit-record-field-mapping.f90 b/offload/test/offloading/fortran/implicit-record-field-mapping.f90
index 77b13bed707c71..29894941c424b5 100644
--- a/offload/test/offloading/fortran/implicit-record-field-mapping.f90
+++ b/offload/test/offloading/fortran/implicit-record-field-mapping.f90
@@ -3,7 +3,7 @@
 ! REQUIRES: flang, amdgpu
 
 ! This fails only because it needs the Fortran runtime built for device. If this
-! is avaialbe, this test succeeds when run.
+! is available, this test succeeds when run.
 ! XFAIL: *
 
 ! RUN: %libomptarget-compile-fortran-generic
diff --git a/offload/test/offloading/fortran/local-descriptor-map-regress.f90 b/offload/test/offloading/fortran/local-descriptor-map-regress.f90
index e6afc4a6fb9b1a..659ca1c692b0f9 100644
--- a/offload/test/offloading/fortran/local-descriptor-map-regress.f90
+++ b/offload/test/offloading/fortran/local-descriptor-map-regress.f90
@@ -1,10 +1,10 @@
 ! Small regression test that checks that we do not cause a runtime map error in
 ! cases where we are required to allocate a local variable for the fortran
-! descriptor to store into and then load from it, done so by re-using the
+! descriptor to store into and then load from it, done so by reusing the
 ! temporary local variable across all maps related to the mapped variable and
 ! associated local variable to make sure that each map does as it is intended
 ! to do with the original data. This prevents blobs of local descriptor data
-! remaining attatched on device long after it's supposed to, which can cause
+! remaining attached on device long after it's supposed to, which can cause
 ! weird map issues later in susbequent function invocations. However, it
 ! doesn't avoid a user shooting themselves in the foot by mapping data via
 ! enter and then not providing a corresponding exit.
diff --git a/offload/test/offloading/fortran/target-map-derived-type-full-1.f90 b/offload/test/offloading/fortran/target-map-derived-type-full-1.f90
index 9f2aeb4bf9fc07..09a847f2cef8cb 100644
--- a/offload/test/offloading/fortran/target-map-derived-type-full-1.f90
+++ b/offload/test/offloading/fortran/target-map-derived-type-full-1.f90
@@ -1,5 +1,5 @@
 ! Offloading test checking interaction of an explicit derived type mapping when
-! mapped to target and assinging one derived type to another
+! mapped to target and assigning one derived type to another
 ! REQUIRES: flang, amdgpu
 
 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
diff --git a/offload/test/offloading/fortran/target-map-derived-type-full-implicit-1.f90 b/offload/test/offloading/fortran/target-map-derived-type-full-implicit-1.f90
index 8632b951d6fbd6..e3d82b7693865e 100644
--- a/offload/test/offloading/fortran/target-map-derived-type-full-implicit-1.f90
+++ b/offload/test/offloading/fortran/target-map-derived-type-full-implicit-1.f90
@@ -1,5 +1,5 @@
 ! Offloading test checking interaction of an implicit derived type mapping when
-! mapped to target and assinging one derived type to another
+! mapped to target and assigning one derived type to another
 ! REQUIRES: flang, amdgpu
 
 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
diff --git a/offload/test/offloading/fortran/target-map-derived-type-full-implicit-2.f90 b/offload/test/offloading/fortran/target-map-derived-type-full-implicit-2.f90
index 9331a48c3eb75e..a6cc6a548960fc 100644
--- a/offload/test/offloading/fortran/target-map-derived-type-full-implicit-2.f90
+++ b/offload/test/offloading/fortran/target-map-derived-type-full-implicit-2.f90
@@ -1,5 +1,5 @@
 ! Offloading test checking interaction of an explicit derived type mapping when
-! mapped to target and assinging one derived type to another
+! mapped to target and assigning one derived type to another
 ! REQUIRES: flang, amdgpu
 
 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
diff --git a/offload/test/offloading/parallel_target_teams_reduction_max.cpp b/offload/test/offloading/parallel_target_teams_reduction_max.cpp
index b6e39daf718c8a..ae0cf25a901788 100644
--- a/offload/test/offloading/parallel_target_teams_reduction_max.cpp
+++ b/offload/test/offloading/parallel_target_teams_reduction_max.cpp
@@ -5,7 +5,7 @@
 // REQUIRES: gpu
 
 // This test validates that the OpenMP target reductions to find a maximum work
-// as indended for a few common data types.
+// as intended for a few common data types.
 
 #include <algorithm>
 #include <cassert>
diff --git a/offload/test/offloading/parallel_target_teams_reduction_min.cpp b/offload/test/offloading/parallel_target_teams_reduction_min.cpp
index 1ab4e9868985e3..8a1610357a5069 100644
--- a/offload/test/offloading/parallel_target_teams_reduction_min.cpp
+++ b/offload/test/offloading/parallel_target_teams_reduction_min.cpp
@@ -5,7 +5,7 @@
 // REQUIRES: gpu
 
 // This test validates that the OpenMP target reductions to find a minimum work
-// as indended for a few common data types.
+// as intended for a few common data types.
 
 #include <algorithm>
 #include <cassert>
diff --git a/offload/test/offloading/struct_mapping_with_pointers.cpp b/offload/test/offloading/struct_mapping_with_pointers.cpp
index f0fde50889dace..9c1d4c67c2ee70 100644
--- a/offload/test/offloading/struct_mapping_with_pointers.cpp
+++ b/offload/test/offloading/struct_mapping_with_pointers.cpp
@@ -32,7 +32,7 @@ int main() {
   dat.datum[dat.arr[0][0]] = 0;
 
   /// The struct is mapped with type 0x0 when the pointer fields are mapped.
-  /// The struct is also map explicitely by the user. The second mapping by
+  /// The struct is also map explicitly by the user. The second mapping by
   /// the user must not overwrite the mapping set up for the pointer fields
   /// when mapping the struct happens after the mapping of the pointers.
 
diff --git a/offload/test/unified_shared_memory/associate_ptr.c b/offload/test/unified_shared_memory/associate_ptr.c
index 5f795dd42dbd32..0e9d25cd735468 100644
--- a/offload/test/unified_shared_memory/associate_ptr.c
+++ b/offload/test/unified_shared_memory/associate_ptr.c
@@ -17,7 +17,7 @@ int main(int argc, char *argv[]) {
   int rc = omp_target_associate_ptr(&x, x_dev, sizeof x, 0, dev);
   assert(!rc && "expected omp_target_associate_ptr to succeed");
 
-// To determine whether x needs to be transfered, the runtime cannot simply
+// To determine whether x needs to be transferred, the runtime cannot simply
 // check whether unified shared memory is enabled and the 'close' modifier is
 // specified.  It must check whether x was previously placed in device memory
 // by, for example, omp_target_associate_ptr.
diff --git a/offload/test/unified_shared_memory/close_member.c b/offload/test/unified_shared_memory/close_member.c
index e5a15e3d19ab63..10ec42e90b02b5 100644
--- a/offload/test/unified_shared_memory/close_member.c
+++ b/offload/test/unified_shared_memory/close_member.c
@@ -23,7 +23,7 @@ int main(int argc, char *argv[]) {
     s.x = 11;
     s.y = 21;
   }
-// To determine whether x needs to be transfered or deleted, the runtime
+// To determine whether x needs to be transferred or deleted, the runtime
 // cannot simply check whether unified shared memory is enabled and the
 // 'close' modifier is specified.  It must check whether x was previously
 // placed in device memory by, for example, a 'close' modifier that isn't
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index bec2fac50142b5..47a299f0ab5570 100644
--- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
   unsigned NumThreads =
       (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value());
   // TODO: Print a warning if number of teams/threads is explicitly set in the
-  // kernel info but overriden through command line options.
+  // kernel info but overridden through command line options.
   auto LoopTripCount =
       JsonKernelInfo->getAsObject()->getInteger("LoopTripCount");
   auto KernelFunc = JsonKernelInfo->getAsObject()->getString("Name");