[Mlir-commits] [mlir] [mlir][AMDGPU] Support vector<2xf16> inputs to buffer atomic fadd (PR #108238)

Wed Sep 11 10:25:39 PDT 2024

================
@@ -115,15 +115,18 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern<GpuOp> {
             rewriter.getIntegerType(floatType.getWidth()));
     }
     if (auto dataVector = dyn_cast<VectorType>(wantedDataType)) {
+      uint32_t vecLen = dataVector.getNumElements();
       uint32_t elemBits = dataVector.getElementTypeBitWidth();
-      uint32_t totalBits = elemBits * dataVector.getNumElements();
+      uint32_t totalBits = elemBits * vecLen;
+      bool usePackedFp16 =
+          dyn_cast_or_null<RawBufferAtomicFaddOp>(*gpuOp) && vecLen == 2;
       if (totalBits > maxVectorOpWidth)
         return gpuOp.emitOpError(
             "Total width of loads or stores must be no more than " +
             Twine(maxVectorOpWidth) + " bits, but we call for " +
             Twine(totalBits) +
             " bits. This should've been caught in validation");
-      if (elemBits < 32) {
+      else if (!usePackedFp16 && elemBits < 32) {
----------------
krzysz00 wrote:

... I thought I saw an approval, reverting, thanks

https://github.com/llvm/llvm-project/pull/108238