[flang-commits] [flang] [flang][cuda] Fix predefined variable processing with inlining (PR #205888)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Thu Jun 25 12:50:35 PDT 2026
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/205888
>From 7df59087d4c6809f13ca7ad86c558e1c22099e9a Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 25 Jun 2026 12:11:46 -0700
Subject: [PATCH 1/2] [flang][cuda] Fix predefined variable processing with
inlining
---
.../Transforms/CUDA/CUFPredefinedVarToGPU.cpp | 24 ++++++-----
flang/test/Fir/CUDA/predefined-variables.mlir | 41 +++++++++++++++++++
2 files changed, 54 insertions(+), 11 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFPredefinedVarToGPU.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFPredefinedVarToGPU.cpp
index ce97b56fe3497..901f15d792173 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFPredefinedVarToGPU.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFPredefinedVarToGPU.cpp
@@ -55,24 +55,23 @@ std::string mangleBuiltin(llvm::StringRef varName) {
varName.str();
}
-static void processCoordinateOp(mlir::OpBuilder &builder, mlir::Location loc,
- fir::CoordinateOp coordOp, unsigned fieldIdx,
- mlir::Value &gpuValue) {
+static void
+processCoordinateOp(mlir::OpBuilder &builder, mlir::Location loc,
+ fir::CoordinateOp coordOp, unsigned fieldIdx,
+ mlir::Value &gpuValue,
+ llvm::SmallVectorImpl<mlir::Operation *> &opsToDelete) {
std::optional<llvm::ArrayRef<int32_t>> fieldIndices =
coordOp.getFieldIndices();
assert(fieldIndices && fieldIndices->size() == 1 &&
"expect only one coordinate");
if (static_cast<unsigned>((*fieldIndices)[0]) == fieldIdx) {
- llvm::SmallVector<fir::LoadOp> opToErase;
for (mlir::OpOperand &coordUse : coordOp.getResult().getUses()) {
assert(mlir::isa<fir::LoadOp>(coordUse.getOwner()) &&
"only expect load op");
auto loadOp = mlir::dyn_cast<fir::LoadOp>(coordUse.getOwner());
loadOp.getResult().replaceAllUsesWith(gpuValue);
- opToErase.push_back(loadOp);
+ opsToDelete.push_back(loadOp);
}
- for (auto op : opToErase)
- op.erase();
}
}
@@ -85,9 +84,12 @@ processDeclareOp(mlir::OpBuilder &builder, mlir::Location loc,
for (mlir::OpOperand &use : declareOp.getResult().getUses()) {
fir::CoordinateOp coordOp =
mlir::dyn_cast<fir::CoordinateOp>(use.getOwner());
- processCoordinateOp(builder, loc, coordOp, field_x, gpuValues[0]);
- processCoordinateOp(builder, loc, coordOp, field_y, gpuValues[1]);
- processCoordinateOp(builder, loc, coordOp, field_z, gpuValues[2]);
+ processCoordinateOp(builder, loc, coordOp, field_x, gpuValues[0],
+ opsToDelete);
+ processCoordinateOp(builder, loc, coordOp, field_y, gpuValues[1],
+ opsToDelete);
+ processCoordinateOp(builder, loc, coordOp, field_z, gpuValues[2],
+ opsToDelete);
opsToDelete.push_back(coordOp);
}
opsToDelete.push_back(declareOp.getOperation());
@@ -143,7 +145,7 @@ struct CUFPredefinedVarToGPU
griddims, opsToDelete);
});
- for (auto op : opsToDelete)
+ for (auto *op : opsToDelete)
op->erase();
}
}
diff --git a/flang/test/Fir/CUDA/predefined-variables.mlir b/flang/test/Fir/CUDA/predefined-variables.mlir
index 6c9d080b00ee8..d223360a7d37b 100644
--- a/flang/test/Fir/CUDA/predefined-variables.mlir
+++ b/flang/test/Fir/CUDA/predefined-variables.mlir
@@ -234,3 +234,44 @@ func.func @_QMbarPgfoo2(%arg0: !fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>,
// CHECK: fir.store %[[ADD]] to %{{.*}} : !fir.ref<i32>
// CHECK: fir.if
// CHECK: fir.store %[[ADD]] to %{{.*}} : !fir.ref<i32>
+
+// -----
+
+func.func @surviving_predefined_vars(%arg0: i32, %arg1: i32, %arg2: i32) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c4 = arith.constant 4 : index
+ %cst = arith.constant 0.000000e+00 : f64
+ %i0 = arith.constant 0 : i32
+ %i1 = arith.constant 1 : i32
+
+ %v0 = fir.alloca i32 {uniq_name = "_QFminiEi"}
+ %v1 = fir.alloca i32 {uniq_name = "_QFminiEj"}
+ %sum = fir.alloca f64 {uniq_name = "_QFminiEsum"}
+ %sum_d = fir.declare %sum {uniq_name = "_QFminiEsum"} : (!fir.ref<f64>) -> !fir.ref<f64>
+
+ cuf.kernel<<<*, *>>> (%iv0 : index, %iv1 : index, %iv2 : index) = (%c0, %c0, %c0 : index, index, index) to (%c1, %c1, %c1 : index, index, index) step (%c1, %c1, %c1 : index, index, index) {
+ %dim = fir.address_of(@_QM__fortran_builtinsE__builtin_blockdim) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %bid = fir.address_of(@_QM__fortran_builtinsE__builtin_blockidx) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %gdim = fir.address_of(@_QM__fortran_builtinsE__builtin_griddim) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %tid = fir.address_of(@_QM__fortran_builtinsE__builtin_threadidx) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+
+ %d_dim = fir.declare %dim {uniq_name = "_QM__fortran_builtinsE__builtin_blockdim"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %d_bid = fir.declare %bid {uniq_name = "_QM__fortran_builtinsE__builtin_blockidx"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %d_gdim = fir.declare %gdim {uniq_name = "_QM__fortran_builtinsE__builtin_griddim"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %d_tid = fir.declare %tid {uniq_name = "_QM__fortran_builtinsE__builtin_threadidx"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+
+ fir.store %i0 to %v0 : !fir.ref<i32>
+ fir.store %i1 to %v1 : !fir.ref<i32>
+ fir.store %cst to %sum_d : !fir.ref<f64>
+ "fir.end"() : () -> ()
+ } {n = 3 : i64}
+
+ return
+}
+
+// CHECK-LABEL: surviving_predefined_vars
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_blockdim
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_blockidx
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_griddim
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_threadidx
>From 51e76e1244288501f84213d93d06dfd2458ccfdb Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 25 Jun 2026 12:50:22 -0700
Subject: [PATCH 2/2] Add test
---
flang/test/Fir/CUDA/predefined-variables.mlir | 42 +++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/flang/test/Fir/CUDA/predefined-variables.mlir b/flang/test/Fir/CUDA/predefined-variables.mlir
index d223360a7d37b..90b72f3bc1bcb 100644
--- a/flang/test/Fir/CUDA/predefined-variables.mlir
+++ b/flang/test/Fir/CUDA/predefined-variables.mlir
@@ -275,3 +275,45 @@ func.func @surviving_predefined_vars(%arg0: i32, %arg1: i32, %arg2: i32) attribu
// CHECK-NOT: _QM__fortran_builtinsE__builtin_blockidx
// CHECK-NOT: _QM__fortran_builtinsE__builtin_griddim
// CHECK-NOT: _QM__fortran_builtinsE__builtin_threadidx
+
+
+// -----
+
+func.func @surviving_predefined_vars(%arg0: i32, %arg1: i32, %arg2: i32) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c4 = arith.constant 4 : index
+ %cst = arith.constant 0.000000e+00 : f64
+ %i0 = arith.constant 0 : i32
+ %i1 = arith.constant 1 : i32
+
+ %v0 = fir.alloca i32 {uniq_name = "_QFminiEi"}
+ %v1 = fir.alloca i32 {uniq_name = "_QFminiEj"}
+ %sum = fir.alloca f64 {uniq_name = "_QFminiEsum"}
+ %sum_d = fir.declare %sum {uniq_name = "_QFminiEsum"} : (!fir.ref<f64>) -> !fir.ref<f64>
+
+ cuf.kernel<<<*, *>>> (%iv0 : index, %iv1 : index, %iv2 : index) = (%c0, %c0, %c0 : index, index, index) to (%c1, %c1, %c1 : index, index, index) step (%c1, %c1, %c1 : index, index, index) {
+ %dim = fir.address_of(@_QM__fortran_builtinsE__builtin_blockdim) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %bid = fir.address_of(@_QM__fortran_builtinsE__builtin_blockidx) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %gdim = fir.address_of(@_QM__fortran_builtinsE__builtin_griddim) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %tid = fir.address_of(@_QM__fortran_builtinsE__builtin_threadidx) : !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+
+ %d_dim = fir.declare %dim {uniq_name = "_QM__fortran_builtinsE__builtin_blockdim"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %d_bid = fir.declare %bid {uniq_name = "_QM__fortran_builtinsE__builtin_blockidx"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %d_gdim = fir.declare %gdim {uniq_name = "_QM__fortran_builtinsE__builtin_griddim"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+ %d_tid = fir.declare %tid {uniq_name = "_QM__fortran_builtinsE__builtin_threadidx"} : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>
+
+ %0 = fir.coordinate_of %d_tid, x : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_dim3{x:i32,y:i32,z:i32}>>) -> !fir.ref<i32>
+ %1 = fir.load %0 : !fir.ref<i32>
+ fir.store %1 to %v0 : !fir.ref<i32>
+ "fir.end"() : () -> ()
+ } {n = 3 : i64}
+
+ return
+}
+
+// CHECK-LABEL: surviving_predefined_vars
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_blockdim
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_blockidx
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_griddim
+// CHECK-NOT: _QM__fortran_builtinsE__builtin_threadidx
More information about the flang-commits
mailing list