[flang-commits] [flang] 475c632 - [flang][cuda] Use local scope to avoid duplicate definition (#166249)
via flang-commits
flang-commits at lists.llvm.org
Mon Nov 3 14:47:54 PST 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-11-03T22:47:50Z
New Revision: 475c632b17a9f9f9ae9428c7621687e255710b7c
URL: https://github.com/llvm/llvm-project/commit/475c632b17a9f9f9ae9428c7621687e255710b7c
DIFF: https://github.com/llvm/llvm-project/commit/475c632b17a9f9f9ae9428c7621687e255710b7c.diff
LOG: [flang][cuda] Use local scope to avoid duplicate definition (#166249)
Added:
Modified:
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/test/Lower/CUDA/cuda-device-proc.cuf
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 6be4d9ce0a46c..2db0606d2bc9e 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3433,13 +3433,15 @@ IntrinsicLibrary::genBarrierTryWait(mlir::Type resultType,
builder.setInsertionPointToStart(afterBlock);
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
- mlir::Value ret =
- mlir::NVVM::InlinePtxOp::create(
- builder, loc, {resultType}, {barrier, args[1], ns}, {},
- ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
- "selp.b32 %0, 1, 0, p;",
- {})
- .getResult(0);
+ mlir::Value ret = mlir::NVVM::InlinePtxOp::create(
+ builder, loc, {resultType}, {barrier, args[1], ns}, {},
+ "{\n"
+ " .reg .pred p;\n"
+ " mbarrier.try_wait.shared.b64 p, [%1], %2, %3;\n"
+ " selp.b32 %0, 1, 0, p;\n"
+ "}",
+ {})
+ .getResult(0);
mlir::scf::YieldOp::create(builder, loc, ret);
builder.setInsertionPointAfter(whileOp);
return whileOp.getResult(0);
@@ -3454,8 +3456,11 @@ IntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType,
auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
return mlir::NVVM::InlinePtxOp::create(
builder, loc, {resultType}, {barrier, args[1], args[2]}, {},
- ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
- "selp.b32 %0, 1, 0, p;",
+ "{\n"
+ " .reg .pred p;\n"
+ " mbarrier.try_wait.shared.b64 p, [%1], %2, %3;\n"
+ " selp.b32 %0, 1, 0, p;\n"
+ "}",
{})
.getResult(0);
}
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 674548b7489e8..ed015df263070 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -519,7 +519,7 @@ end subroutine
! CHECK-LABEL: func.func @_QPtest_barrier_try_wait()
! CHECK: scf.while
-! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %{{.*}}, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %c1000000{{.*}} : !llvm.ptr, i64, i32) -> i32
+! CHECK: %{{.*}} = nvvm.inline_ptx "{\0A .reg .pred p;\0A mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}};\0A selp.b32 %{{.*}}, 1, 0, p;\0A}" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
attributes(global) subroutine test_barrier_try_wait_sleep()
integer :: istat
@@ -530,7 +530,7 @@ attributes(global) subroutine test_barrier_try_wait_sleep()
end subroutine
! CHECK-LABEL: func.func @_QPtest_barrier_try_wait_sleep()
-! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %0, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
+! CHECK: %{{.*}} = nvvm.inline_ptx "{\0A .reg .pred p;\0A mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}};\0A selp.b32 %{{.*}}, 1, 0, p;\0A}" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
attributes(global) subroutine test_tma_bulk_load_c4(a, n)
integer(8), shared :: barrier1
More information about the flang-commits
mailing list