[flang-commits] [flang] 2334330 - [flang][cuda] Imply zero offset when not provided (#189421)
via flang-commits
flang-commits at lists.llvm.org
Mon Mar 30 09:51:15 PDT 2026
Author: Valentin Clement (バレンタイン クレメン)
Date: 2026-03-30T09:51:11-07:00
New Revision: 2334330bd937ff0b42db902d9943717879f13d93
URL: https://github.com/llvm/llvm-project/commit/2334330bd937ff0b42db902d9943717879f13d93
DIFF: https://github.com/llvm/llvm-project/commit/2334330bd937ff0b42db902d9943717879f13d93.diff
LOG: [flang][cuda] Imply zero offset when not provided (#189421)
Added:
Modified:
flang/lib/Optimizer/Transforms/CUDA/CUFGPUToLLVMConversion.cpp
flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFGPUToLLVMConversion.cpp
index d5a8212eb5472..3d29454d1403a 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFGPUToLLVMConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFGPUToLLVMConversion.cpp
@@ -244,10 +244,6 @@ struct CUFSharedMemoryOpConversion
matchAndRewrite(cuf::SharedMemoryOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
mlir::Location loc = op->getLoc();
- if (!op.getOffset())
- mlir::emitError(loc,
- "cuf.shared_memory must have an offset for code gen");
-
auto gpuMod = op->getParentOfType<gpu::GPUModuleOp>();
std::string sharedGlobalName =
@@ -266,7 +262,10 @@ struct CUFSharedMemoryOpConversion
rewriter, loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()),
sharedGlobalAddr);
mlir::Type baseType = castPtr->getResultTypes().front();
- llvm::SmallVector<mlir::LLVM::GEPArg> gepArgs = {op.getOffset()};
+ mlir::LLVM::GEPArg offsetArg =
+ op.getOffset() ? mlir::LLVM::GEPArg(op.getOffset())
+ : mlir::LLVM::GEPArg(static_cast<int32_t>(0));
+ llvm::SmallVector<mlir::LLVM::GEPArg> gepArgs = {offsetArg};
mlir::Value shmemPtr = mlir::LLVM::GEPOp::create(
rewriter, loc, baseType, rewriter.getI8Type(), castPtr, gepArgs);
rewriter.replaceOp(op, {shmemPtr});
diff --git a/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir b/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir
index 69370613cd348..f1f8e0f353353 100644
--- a/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir
+++ b/flang/test/Fir/CUDA/cuda-shared-to-llvm.mlir
@@ -20,3 +20,20 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
// CHECK: %[[ADDR1:.*]] = llvm.mlir.addressof @_QPshared_static__shared_mem__ : !llvm.ptr<3>
// CHECK: %[[ADDRCAST1:.*]] = llvm.addrspacecast %[[ADDR1]] : !llvm.ptr<3> to !llvm.ptr
// CHECK: %[[B:.*]] = llvm.getelementptr %[[ADDRCAST1]][%c4{{.*}}] : (!llvm.ptr, i32) -> !llvm.ptr, i8
+
+// -----
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+ gpu.module @cuda_device_mod {
+ llvm.func @_QPshared_static() {
+ %0 = cuf.shared_memory i32 {bindc_name = "a", uniq_name = "_QFshared_staticEa"} -> !fir.ref<i32>
+ llvm.return
+ }
+ llvm.mlir.global common @_QPshared_static__shared_mem__(dense<0> : vector<28xi8>) {addr_space = 3 : i32, alignment = 8 : i64} : !llvm.array<28 x i8>
+ }
+}
+
+// CHECK-LABEL: llvm.func @_QPshared_static()
+// CHECK: %[[ADDR0:.*]] = llvm.mlir.addressof @_QPshared_static__shared_mem__ : !llvm.ptr<3>
+// CHECK: %[[ADDRCAST0:.*]] = llvm.addrspacecast %[[ADDR0]] : !llvm.ptr<3> to !llvm.ptr
+// CHECK: %[[A:.*]] = llvm.getelementptr %[[ADDRCAST0]][0] : (!llvm.ptr) -> !llvm.ptr, i8
More information about the flang-commits
mailing list