[Mlir-commits] [mlir] [mlir][AMDGPU] Add scaled floating point conversion ops (PR #141554)

Fri Jun 13 05:59:32 PDT 2025

================
@@ -1230,6 +1257,165 @@ LogicalResult ExtPackedFp8OpLowering::matchAndRewrite(
   return success();
 }
 
+LogicalResult ScaledExtPackedOpLowering::matchAndRewrite(
+    ScaledExtPackedOp op, ScaledExtPackedOpAdaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+  Location loc = op.getLoc();
+  if (chipset != kGfx950)
+    return rewriter.notifyMatchFailure(
+        loc, "Scaled fp conversion instructions are not available on target "
+             "architecture and their emulation is not implemented");
+  Type i32 = getTypeConverter()->convertType(rewriter.getI32Type());
+
+  Value source = adaptor.getSource();
+  Value scale = adaptor.getScale();
+
+  VectorType sourceVecType = cast<VectorType>(op.getSource().getType());
+  Type sourceElemType = sourceVecType.getElementType();
+  VectorType destVecType = cast<VectorType>(op.getResult().getType());
+  Type destElemType = destVecType.getElementType();
+
+  VectorType packedVecType;
+  if (isa<Float8E5M2Type, Float8E4M3FNType>(sourceElemType)) {
+    VectorType v4i8 = VectorType::get(4, rewriter.getI8Type());
+    packedVecType = cast<VectorType>(getTypeConverter()->convertType(v4i8));
+  } else if (isa<Float4E2M1FNType>(sourceElemType)) {
+    VectorType v8i4 = VectorType::get(8, rewriter.getI4Type());
+    packedVecType = cast<VectorType>(getTypeConverter()->convertType(v8i4));
+  } else {
+    llvm_unreachable("invalid element type for scaled ext");
+  }
+
+  // Extend to a packedVectorType
+  if (sourceVecType.getNumElements() < packedVecType.getNumElements()) {
+    Value longVec = rewriter.create<LLVM::ZeroOp>(loc, packedVecType);
----------------
umangyadav wrote:

It happen to notice later that for the FP8/BF8 -> FP32/FP16 we have non-packed instructions as well. 

https://github.com/llvm/llvm-project/blob/4f8187c0dc6e7a818ebf3272a0c022203f901e96/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td#L976C7-L976C28

We don't need to go through padding for those unnecessarily. 


https://github.com/llvm/llvm-project/pull/141554