[llvm] [AMDGPU] Add TDM Descriptor Optimization Pass (PR #173324)
Quentin Colombet via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 22 17:56:56 PST 2025
================
@@ -0,0 +1,495 @@
+//===-- AMDGPUTDMOptimization.cpp - TDM Descriptor Optimization ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes Tensor Data Movement (TDM) descriptor creation patterns.
+// It identifies insertelement chains that create descriptors and transforms them
+// to use alloca+field updates, which SROA later optimizes to INSERT_SUBREG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-tdm-optimization"
+
+static cl::opt<unsigned>
+TDMOptBenefitThreshold("amdgpu-tdm-opt-threshold", cl::Hidden, cl::init(10),
+ cl::desc("Minimum optimization benefit threshold for TDM descriptor optimization"));
+
+namespace llvm {
+ void initializeAMDGPUTDMOptimizationPass(PassRegistry &);
+}
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Pattern Detection Data Structures
+//===----------------------------------------------------------------------===//
+
+/// Represents a single descriptor creation pattern
+struct DescriptorPattern {
+ Type *DescType; ///< <4 x i32> or <8 x i32>
+ Value *BaseValue; ///< Base template (constant or computed)
+ SmallVector<InsertElementInst *, 8> Chain; ///< Chain of insertelement instructions
+ SmallVector<unsigned, 8> VariableFields; ///< Fields that change
+ SmallVector<unsigned, 8> ConstantFields; ///< Fields that stay constant
+ BasicBlock *Location; ///< Where the pattern is located
+ Loop *ContainingLoop; ///< Loop containing this pattern (if any)
+
+ /// Calculate field reuse ratio (constant fields / total fields)
+ float getFieldReuseRatio() const {
+ unsigned totalFields = cast<FixedVectorType>(DescType)->getNumElements();
+ return (float)ConstantFields.size() / totalFields;
+ }
+
+ /// Check if this pattern is worth optimizing
+ bool isWorthOptimizing() const {
+ // Always optimize if in loop with reuse potential
+ if (ContainingLoop && getFieldReuseRatio() >= 0.5f)
+ return true;
+
+ // Optimize if significant field reuse
+ if (getFieldReuseRatio() >= 0.75f)
+ return true;
+
+ // Optimize address descriptors (common case)
+ if (isAddressDescriptor() && ConstantFields.size() >= 1)
----------------
qcolombet wrote:
```suggestion
if (isAddressDescriptor() && !ConstantFields.empty())
```
https://github.com/llvm/llvm-project/pull/173324
More information about the llvm-commits
mailing list