[Mlir-commits] [mlir] 4cba595 - [mlir][ROCDL] Set the LLVM data layout when lowering to ROCDL LLVM (#74501)

Tue Feb 27 07:59:54 PST 2024

Author: Krzysztof Drewniak
Date: 2024-02-27T09:59:50-06:00
New Revision: 4cba5957e6ffdbeb44174d32da892ad09b3eed88

URL: https://github.com/llvm/llvm-project/commit/4cba5957e6ffdbeb44174d32da892ad09b3eed88
DIFF: https://github.com/llvm/llvm-project/commit/4cba5957e6ffdbeb44174d32da892ad09b3eed88.diff

LOG: [mlir][ROCDL] Set the LLVM data layout when lowering to ROCDL LLVM (#74501)

In order to ensure operations lower correctly (especially
memref.addrspacecast, which relies on the data layout benig set
correctly then dealing with dynamic memrefs) and to prevent compilation
issues later down the line, set the `llvm.data_layout` attribute on GPU
modules when lowering their contents to a ROCDL / AMDGPU target.

If there's a good way to test the embedded string to prevent it from
going out of sync with the LLVM TargetMachine, I'd appreciate hearing
about it. (Or, alternatively, if there's a place I could farctor the
string out to).

Added: 
    

Modified: 
    mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
    mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 4fa3cdcbf85ce2..f425b1f59d9940 100644

--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -75,6 +75,11 @@ Value getLaneId(ConversionPatternRewriter &rewriter, Location loc,
                                                    ValueRange{minus1, mbcntLo});
   return laneId;
 }
+static constexpr StringLiteral amdgcnDataLayout =
+    "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
+    "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:"
+    "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
+    "G1-ni:7:8";
 
 namespace {
 struct GPULaneIdOpToROCDL : ConvertOpToLLVMPattern<gpu::LaneIdOp> {
@@ -212,6 +217,12 @@ struct LowerGpuOpsToROCDLOpsPass
     gpu::GPUModuleOp m = getOperation();
     MLIRContext *ctx = m.getContext();
 
+    auto llvmDataLayout = m->getAttrOfType<StringAttr>(
+        LLVM::LLVMDialect::getDataLayoutAttrName());
+    if (!llvmDataLayout) {
+      llvmDataLayout = StringAttr::get(ctx, amdgcnDataLayout);
+      m->setAttr(LLVM::LLVMDialect::getDataLayoutAttrName(), llvmDataLayout);
+    }
     // Request C wrapper emission.
     for (auto func : m.getOps<func::FuncOp>()) {
       func->setAttr(LLVM::LLVMDialect::getEmitCWrapperAttrName(),
@@ -227,6 +238,7 @@ struct LowerGpuOpsToROCDLOpsPass
     /// Customize the bitwidth used for the device side index computations.
     LowerToLLVMOptions options(
         ctx, DataLayout(cast<DataLayoutOpInterface>(m.getOperation())));
+    options.dataLayout = llvm::DataLayout(llvmDataLayout.getValue());
     if (indexBitwidth != kDeriveIndexBitwidthFromDataLayout)
       options.overrideIndexBitwidth(indexBitwidth);
 

diff  --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 2652b866570996..8a2d8bd7967caf 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -1,6 +1,8 @@
 // RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s
 // RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s
 
+// CHECK-LABEL: @test_module
+// CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
   // CHECK32-LABEL: func @gpu_index_ops()
@@ -628,3 +630,11 @@ gpu.module @test_module {
     func.return %shfl, %shfli : f32, f32
   }
 }
+
+// -----
+
+// CHECK-LABEL: @test_custom_data_layout
+// CHECK-SAME: llvm.data_layout = "e"
+gpu.module @test_custom_data_layout attributes {llvm.data_layout = "e"} {
+
+}