[llvm] [AMDGPU][NFC] Refactor D16 folding for image samples with multiple ExtractElement+FPTrunc chains (PR #145312)

Harrison Hao via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 23 04:40:52 PDT 2025


https://github.com/harrisonGPU created https://github.com/llvm/llvm-project/pull/145312

None

>From f94083ddfdf101d825dd37df582bf1a17b3ab745 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Mon, 23 Jun 2025 15:45:17 +0800
Subject: [PATCH] [AMDGPU][NFC] Refactor D16 folding for image samples with
 multiple ExtractElement+FPTrunc chains

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 128 ++++++++++--------
 1 file changed, 69 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5477c5eae9392..171d44b5ec329 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -34,6 +34,12 @@ struct AMDGPUImageDMaskIntrinsic {
   unsigned Intr;
 };
 
+struct D16Candidate {
+  SmallVector<Instruction *, 4> InstsToErase;
+  Instruction *Replacee = nullptr;
+  Value *Index = nullptr;
+};
+
 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
 #include "InstCombineTables.inc"
 
@@ -150,6 +156,67 @@ static std::optional<Instruction *> modifyIntrinsicCall(
   return RetValue;
 }
 
+/// Attempts to fold an image sample whose users are ExtractElement + FPTrunc
+/// chains into a D16-returning version.
+static std::optional<Instruction *>
+modifyImageIntrinsicForD16(IntrinsicInst &II,
+                           const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
+                           InstCombiner &IC) {
+  SmallVector<D16Candidate, 4> Candidates;
+
+  // Collect all (ExtractElement, FPTrunc) pairs; abort on the first mismatch
+  for (User *U : II.users()) {
+    auto *Ext = dyn_cast<ExtractElementInst>(U);
+    if (!Ext || !Ext->hasOneUse())
+      return std::nullopt;
+
+    auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin());
+    if (!Tr || !Tr->getType()->getScalarType()->isHalfTy())
+      return std::nullopt;
+
+    auto &Cand = Candidates.emplace_back();
+    Cand.InstsToErase = {Tr, Ext};
+    Cand.Replacee = Tr;
+    Cand.Index = Ext->getIndexOperand();
+  }
+
+  if (Candidates.empty())
+    return std::nullopt;
+
+  // Build the new half-vector return type
+  auto *VecTy = cast<VectorType>(II.getType());
+  Type *HalfVecTy = VecTy->getWithNewType(Type::getHalfTy(II.getContext()));
+
+  // Obtain the original image sample intrinsic's signature
+  // and replace its return type with the half-vector for D16 folding
+  SmallVector<Type *, 8> SigTys;
+  Intrinsic::getIntrinsicSignature(II.getCalledFunction(), SigTys);
+  SigTys[0] = HalfVecTy;
+
+  Function *HalfDecl = Intrinsic::getOrInsertDeclaration(
+      II.getModule(), ImageDimIntr->Intr, SigTys);
+
+  II.mutateType(HalfVecTy);
+  II.setCalledFunction(HalfDecl);
+
+  // Replace each chain with a single ExtractElement from the new D16 image
+  IRBuilder<> B(II.getContext());
+  for (auto &[Insts, Replacee, Idx] : Candidates) {
+    B.SetInsertPoint(Replacee);
+    auto *HalfExtract = B.CreateExtractElement(&II, Idx);
+    HalfExtract->takeName(Replacee);
+    Replacee->replaceAllUsesWith(HalfExtract);
+  }
+
+  // Erase the old instructions
+  for (auto &[Insts, Replacee, Idx] : Candidates) {
+    for (auto *I : Insts)
+      IC.eraseInstFromFunction(*I);
+  }
+
+  return &II;
+}
+
 static std::optional<Instruction *>
 simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
@@ -249,65 +316,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
         }
       }
 
-      // Only perform D16 folding if every user of the image sample is
-      // an ExtractElementInst immediately followed by an FPTrunc to half.
-      SmallVector<std::pair<ExtractElementInst *, FPTruncInst *>, 4>
-          ExtractTruncPairs;
-      bool AllHalfExtracts = true;
-
-      for (User *U : II.users()) {
-        auto *Ext = dyn_cast<ExtractElementInst>(U);
-        if (!Ext || !Ext->hasOneUse()) {
-          AllHalfExtracts = false;
-          break;
-        }
-
-        auto *Tr = dyn_cast<FPTruncInst>(*Ext->user_begin());
-        if (!Tr || !Tr->getType()->isHalfTy()) {
-          AllHalfExtracts = false;
-          break;
-        }
-
-        ExtractTruncPairs.emplace_back(Ext, Tr);
-      }
-
-      if (!ExtractTruncPairs.empty() && AllHalfExtracts) {
-        auto *VecTy = cast<VectorType>(II.getType());
-        Type *HalfVecTy =
-            VecTy->getWithNewType(Type::getHalfTy(II.getContext()));
-
-        // Obtain the original image sample intrinsic's signature
-        // and replace its return type with the half-vector for D16 folding
-        SmallVector<Type *, 8> SigTys;
-        Intrinsic::getIntrinsicSignature(II.getCalledFunction(), SigTys);
-        SigTys[0] = HalfVecTy;
-
-        Module *M = II.getModule();
-        Function *HalfDecl =
-            Intrinsic::getOrInsertDeclaration(M, ImageDimIntr->Intr, SigTys);
-
-        II.mutateType(HalfVecTy);
-        II.setCalledFunction(HalfDecl);
-
-        IRBuilder<> Builder(II.getContext());
-        for (auto &[Ext, Tr] : ExtractTruncPairs) {
-          Value *Idx = Ext->getIndexOperand();
-
-          Builder.SetInsertPoint(Tr);
-
-          Value *HalfExtract = Builder.CreateExtractElement(&II, Idx);
-          HalfExtract->takeName(Tr);
-
-          Tr->replaceAllUsesWith(HalfExtract);
-        }
-
-        for (auto &[Ext, Tr] : ExtractTruncPairs) {
-          IC.eraseInstFromFunction(*Tr);
-          IC.eraseInstFromFunction(*Ext);
-        }
-
-        return &II;
-      }
+      if (auto FoldedII = modifyImageIntrinsicForD16(II, ImageDimIntr, IC))
+        return *FoldedII;
     }
   }
 



More information about the llvm-commits mailing list