[Mlir-commits] [mlir] [mlir][amdgpu] Add lowering for make_dma_descriptor (PR #169955)
Erick Ochoa Lopez
llvmlistbot at llvm.org
Tue Dec 30 09:19:09 PST 2025
================
@@ -2339,6 +2341,344 @@ struct AMDGPUMakeDmaBaseLowering
}
};
+struct AMDGPUMakeDmaDescriptorLowering
+ : public ConvertOpToLLVMPattern<MakeDmaDescriptorOp> {
+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+ AMDGPUMakeDmaDescriptorLowering(const LLVMTypeConverter &converter,
+ Chipset chipset)
+ : ConvertOpToLLVMPattern<MakeDmaDescriptorOp>(converter),
+ chipset(chipset) {}
+ Chipset chipset;
+
+ Value getDGroup0(OpAdaptor adaptor) const { return adaptor.getBase(); }
+
+ Value setValueAtOffset(ConversionPatternRewriter &rewriter, Location loc,
+ Value accumulator, Value value, int64_t shift) const {
+ shift = shift % 32;
+ Value shiftAmount;
+ if (shift != 0) {
+ shiftAmount = createI32Constant(rewriter, loc, shift % 32);
+ value = LLVM::ShlOp::create(rewriter, loc, value, shiftAmount);
+ }
+
+ if (matchPattern(accumulator, mlir::m_Zero()))
+ return value;
+
+ return LLVM::OrOp::create(rewriter, loc, accumulator, value);
+ }
+
+ Value setDataSize(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr0, ArrayRef<Value> consts) const {
+ // Compute data_size.
+ unsigned elementTypeWidthInBits = op.getElementTypeWidth();
+ assert(
+ llvm::is_contained<unsigned>({8, 16, 32, 64}, elementTypeWidthInBits) &&
+ "expected type width to be 8, 16, 32, or 64.");
+ int64_t dataSize = llvm::Log2_32(elementTypeWidthInBits / 8);
+ return createI32Constant(rewriter, loc, dataSize << 16);
+ }
+
+ Value setAtomicBarrier(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr0, ArrayRef<Value> consts) const {
+ bool atomic_barrier_enable = adaptor.getAtomicBarrierAddress() != nullptr;
+ if (!atomic_barrier_enable)
+ return sgpr0;
+
+ return setValueAtOffset(rewriter, loc, sgpr0, consts[1], 18);
+ }
+
+ Value setIterateEnable(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr0, ArrayRef<Value> consts) const {
+ bool iterate_enable = adaptor.getGlobalIncrement() != nullptr;
+ if (!iterate_enable)
+ return sgpr0;
+
+ // TODO: In future PR, add other required fields for iteration.
+ return setValueAtOffset(rewriter, loc, sgpr0, consts[1], 19);
+ }
+
+ Value setPadEnable(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr0, ArrayRef<Value> consts) const {
+ bool pad_enable = op.getPadAmount() != nullptr;
+ if (!pad_enable)
+ return sgpr0;
+
+ return setValueAtOffset(rewriter, loc, sgpr0, consts[1], 20);
+ }
+
+ Value setPadInterval(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr0, ArrayRef<Value> consts) const {
+ bool pad_enable = op.getPadAmount() != nullptr;
+ if (!pad_enable)
+ return sgpr0;
+
+ IntegerType i32 = rewriter.getI32Type();
+ Value padInterval = adaptor.getPadInterval();
+ // pre-condition: padInterval can be a power of two between 2 and 256.
+ padInterval = LLVM::CountTrailingZerosOp::create(rewriter, loc, i32,
+ padInterval, false);
+ padInterval = LLVM::SubOp::create(rewriter, loc, padInterval, consts[1]);
+ // post-condition: padInterval can be a value between 0 and 7.
+ return setValueAtOffset(rewriter, loc, sgpr0, padInterval, 22);
+ }
+
+ Value setPadAmount(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr0, ArrayRef<Value> consts) const {
+ bool pad_enable = op.getPadAmount() != nullptr;
+ if (!pad_enable)
+ return sgpr0;
+
+ Value padAmount = adaptor.getPadAmount();
+ // pre-condition: padAmount is a value between 1-128.
+ padAmount = LLVM::SubOp::create(rewriter, loc, padAmount, consts[1]);
+ // post-condition: padAmount is a value between 0-127.
+ return setValueAtOffset(rewriter, loc, sgpr0, padAmount, 25);
+ }
+
+ Value setAtomicBarrierAddress(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter,
+ Location loc, Value sgpr1,
+ ArrayRef<Value> consts) const {
+ bool atomic_barrier_enable = adaptor.getAtomicBarrierAddress() != nullptr;
+ if (!atomic_barrier_enable)
+ return sgpr1;
+
+ Value atomicBarrierAddress = adaptor.getAtomicBarrierAddress();
+ auto barrierAddressTy =
+ cast<MemRefType>(op.getAtomicBarrierAddress().getType());
+ ValueRange atomicBarrierIndices = adaptor.getAtomicBarrierIndices();
+ atomicBarrierAddress =
+ getStridedElementPtr(rewriter, loc, barrierAddressTy,
+ atomicBarrierAddress, atomicBarrierIndices);
+ IntegerType i32 = rewriter.getI32Type();
+ // pre-condition: atomicBarrierAddress is aligned to 8 bytes which implies
+ // that the 3 LSBs are zero.
+ atomicBarrierAddress =
+ LLVM::PtrToIntOp::create(rewriter, loc, i32, atomicBarrierAddress);
+ atomicBarrierAddress =
+ LLVM::LShrOp::create(rewriter, loc, atomicBarrierAddress, consts[3]);
+ Value mask = createI32Constant(rewriter, loc, 0xFFFF);
+ atomicBarrierAddress =
+ LLVM::AndOp::create(rewriter, loc, atomicBarrierAddress, mask);
+ return setValueAtOffset(rewriter, loc, sgpr1, atomicBarrierAddress, 32);
+ }
+
+ std::pair<Value, Value> setTensorDim0(MakeDmaDescriptorOp op,
+ OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter,
+ Location loc, Value sgpr1, Value sgpr2,
+ ArrayRef<Value> consts) const {
+ SmallVector<OpFoldResult> mixedGlobalSizes = op.getMixedGlobalSizes();
+ OpFoldResult tensorDim0OpFoldResult = mixedGlobalSizes.back();
+ Value tensorDim0;
+ if (auto attr = dyn_cast<Attribute>(tensorDim0OpFoldResult))
+ tensorDim0 =
+ createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
+ else
+ tensorDim0 = cast<Value>(tensorDim0OpFoldResult);
+
+ Value c16 = createI32Constant(rewriter, loc, 16);
+ Value tensorDim0High = LLVM::LShrOp::create(rewriter, loc, tensorDim0, c16);
+ sgpr1 = setValueAtOffset(rewriter, loc, sgpr1, tensorDim0, 48);
+ sgpr2 = setValueAtOffset(rewriter, loc, sgpr2, tensorDim0High, 48 + 16);
+ return {sgpr1, sgpr2};
+ }
+
+ std::pair<Value, Value> setTensorDim1(MakeDmaDescriptorOp op,
+ OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter,
+ Location loc, Value sgpr2, Value sgpr3,
+ ArrayRef<Value> consts) const {
+ // TODO: Generalize to setTensorDimX.
+ SmallVector<OpFoldResult> mixedGlobalSizes = op.getMixedGlobalSizes();
+ OpFoldResult tensorDim1OpFoldResult = *(mixedGlobalSizes.rbegin() + 1);
+ Value tensorDim1;
+ if (auto attr = dyn_cast<Attribute>(tensorDim1OpFoldResult))
+ tensorDim1 =
+ createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
+ else
+ tensorDim1 = cast<Value>(tensorDim1OpFoldResult);
+
+ Value c16 = createI32Constant(rewriter, loc, 16);
+ Value tensorDim1High = LLVM::LShrOp::create(rewriter, loc, tensorDim1, c16);
+ sgpr2 = setValueAtOffset(rewriter, loc, sgpr2, tensorDim1, 80);
+ sgpr3 = setValueAtOffset(rewriter, loc, sgpr3, tensorDim1High, 80 + 16);
+ return {sgpr2, sgpr3};
+ }
+
+ Value setTileDimX(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr, ArrayRef<Value> consts, size_t dimX,
+ int64_t offset) const {
+ SmallVector<OpFoldResult> mixedSharedSizes = op.getMixedSharedSizes();
+
+ if (mixedSharedSizes.size() <= dimX)
+ return sgpr;
+
+ OpFoldResult tileDimXOpFoldResult = *(mixedSharedSizes.rbegin() + dimX);
+ Value tileDimX;
+ if (auto attr = dyn_cast<Attribute>(tileDimXOpFoldResult))
+ tileDimX =
+ createI32Constant(rewriter, loc, cast<IntegerAttr>(attr).getInt());
+ else
+ tileDimX = cast<Value>(tileDimXOpFoldResult);
+
+ return setValueAtOffset(rewriter, loc, sgpr, tileDimX, offset);
+ }
+
+ Value setTileDim0(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr3, ArrayRef<Value> consts) const {
+ return setTileDimX(op, adaptor, rewriter, loc, sgpr3, consts, 0, 112);
+ }
+
+ Value setTileDim1(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr4, ArrayRef<Value> consts) const {
+ return setTileDimX(op, adaptor, rewriter, loc, sgpr4, consts, 1, 128);
+ }
+
+ Value setTileDim2(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgpr4, ArrayRef<Value> consts) const {
+ return setTileDimX(op, adaptor, rewriter, loc, sgpr4, consts, 2, 144);
+ }
+
+ std::pair<Value, Value>
+ setTensorDimXStride(MakeDmaDescriptorOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter, Location loc,
+ Value sgprY, Value sgprZ, ArrayRef<Value> consts,
+ size_t dimX, int64_t offset) const {
+ SmallVector<OpFoldResult> mixedGlobalStrides = op.getMixedGlobalStrides();
+
+ if (mixedGlobalStrides.size() <= dimX)
+ return {sgprY, sgprZ};
+
+ OpFoldResult tensorDimXStrideOpFoldResult =
+ *(mixedGlobalStrides.rbegin() + dimX);
----------------
amd-eochoalo wrote:
I got it, thanks, submitting PR...
https://github.com/llvm/llvm-project/pull/169955
More information about the Mlir-commits
mailing list