[llvm] [AMDGPU] Support merging 16-bit TBUFFER load/store instruction (PR #145078)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 09:03:07 PDT 2025


================
@@ -1040,32 +1048,55 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
   if (CI.Offset == Paired.Offset)
     return false;
 
+  unsigned EltSize = CI.EltSize;
+
   // This won't be valid if the offset isn't aligned.
-  if ((CI.Offset % CI.EltSize != 0) || (Paired.Offset % CI.EltSize != 0))
+  if ((CI.Offset % EltSize != 0) || (Paired.Offset % EltSize != 0))
     return false;
 
   if (CI.InstClass == TBUFFER_LOAD || CI.InstClass == TBUFFER_STORE) {
-
-    const llvm::AMDGPU::GcnBufferFormatInfo *Info0 =
-        llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format, STI);
+    const AMDGPU::GcnBufferFormatInfo *Info0 =
+        AMDGPU::getGcnBufferFormatInfo(CI.Format, STI);
     if (!Info0)
       return false;
-    const llvm::AMDGPU::GcnBufferFormatInfo *Info1 =
-        llvm::AMDGPU::getGcnBufferFormatInfo(Paired.Format, STI);
+    const AMDGPU::GcnBufferFormatInfo *Info1 =
+        AMDGPU::getGcnBufferFormatInfo(Paired.Format, STI);
     if (!Info1)
       return false;
 
     if (Info0->BitsPerComp != Info1->BitsPerComp ||
         Info0->NumFormat != Info1->NumFormat)
       return false;
 
-    // TODO: Should be possible to support more formats, but if format loads
-    // are not dword-aligned, the merged load might not be valid.
-    if (Info0->BitsPerComp != 32)
+    // Buffer instructions support up to 4 components per access (e.g., x, xy,
+    // xyz, xyzw).
+    unsigned NumCombinedComponents = CI.Width + Paired.Width;
+    if (NumCombinedComponents > 4)
+      return false;
----------------
jayfoad wrote:

Don't need this. It is handled inside `getBufferFormatWithCompCount`.

https://github.com/llvm/llvm-project/pull/145078


More information about the llvm-commits mailing list