[flang-commits] [flang] 475c632 - [flang][cuda] Use local scope to avoid duplicate definition (#166249)

via flang-commits flang-commits at lists.llvm.org
Mon Nov 3 14:47:54 PST 2025


Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-11-03T22:47:50Z
New Revision: 475c632b17a9f9f9ae9428c7621687e255710b7c

URL: https://github.com/llvm/llvm-project/commit/475c632b17a9f9f9ae9428c7621687e255710b7c
DIFF: https://github.com/llvm/llvm-project/commit/475c632b17a9f9f9ae9428c7621687e255710b7c.diff

LOG: [flang][cuda] Use local scope to avoid duplicate definition (#166249)

Added: 
    

Modified: 
    flang/lib/Optimizer/Builder/IntrinsicCall.cpp
    flang/test/Lower/CUDA/cuda-device-proc.cuf

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 6be4d9ce0a46c..2db0606d2bc9e 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3433,13 +3433,15 @@ IntrinsicLibrary::genBarrierTryWait(mlir::Type resultType,
   builder.setInsertionPointToStart(afterBlock);
   auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
   auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
-  mlir::Value ret =
-      mlir::NVVM::InlinePtxOp::create(
-          builder, loc, {resultType}, {barrier, args[1], ns}, {},
-          ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
-          "selp.b32 %0, 1, 0, p;",
-          {})
-          .getResult(0);
+  mlir::Value ret = mlir::NVVM::InlinePtxOp::create(
+                        builder, loc, {resultType}, {barrier, args[1], ns}, {},
+                        "{\n"
+                        "  .reg .pred p;\n"
+                        "  mbarrier.try_wait.shared.b64 p, [%1], %2, %3;\n"
+                        "  selp.b32 %0, 1, 0, p;\n"
+                        "}",
+                        {})
+                        .getResult(0);
   mlir::scf::YieldOp::create(builder, loc, ret);
   builder.setInsertionPointAfter(whileOp);
   return whileOp.getResult(0);
@@ -3454,8 +3456,11 @@ IntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType,
   auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
   return mlir::NVVM::InlinePtxOp::create(
              builder, loc, {resultType}, {barrier, args[1], args[2]}, {},
-             ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
-             "selp.b32 %0, 1, 0, p;",
+             "{\n"
+             "  .reg .pred p;\n"
+             "  mbarrier.try_wait.shared.b64 p, [%1], %2, %3;\n"
+             "  selp.b32 %0, 1, 0, p;\n"
+             "}",
              {})
       .getResult(0);
 }

diff  --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 674548b7489e8..ed015df263070 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -519,7 +519,7 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPtest_barrier_try_wait()
 ! CHECK: scf.while
-! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %{{.*}}, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %c1000000{{.*}} : !llvm.ptr, i64, i32) -> i32
+! CHECK: %{{.*}} = nvvm.inline_ptx "{\0A  .reg .pred p;\0A  mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}};\0A  selp.b32 %{{.*}}, 1, 0, p;\0A}" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
 
 attributes(global) subroutine test_barrier_try_wait_sleep()
   integer :: istat
@@ -530,7 +530,7 @@ attributes(global) subroutine test_barrier_try_wait_sleep()
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPtest_barrier_try_wait_sleep()
-! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %0, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
+! CHECK: %{{.*}} = nvvm.inline_ptx "{\0A  .reg .pred p;\0A  mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}};\0A  selp.b32 %{{.*}}, 1, 0, p;\0A}" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
 
 attributes(global) subroutine test_tma_bulk_load_c4(a, n)
   integer(8), shared :: barrier1


        


More information about the flang-commits mailing list