[llvm] [NVPTX] Support fence instruction (PR #99649)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 21 05:42:11 PDT 2024
https://github.com/gonzalobg updated https://github.com/llvm/llvm-project/pull/99649
>From 1a395c1635434b1153907e3ce28e52f579e1373a Mon Sep 17 00:00:00 2001
From: Gonzalo Brito Gadeschi <gonzalob at nvidia.com>
Date: Mon, 15 Jul 2024 12:23:44 -0700
Subject: [PATCH 1/3] [NVPTX] Support fence instruction
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 29 ++++++++++++++++++++
llvm/test/CodeGen/NVPTX/fence.ll | 36 +++++++++++++++++++++++++
2 files changed, 65 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/fence.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 7f1ac8688007e..c5146e9f33088 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3908,3 +3908,32 @@ def : Pat <
(V2I32toI64
(INT_NVVM_PRMT (I64toI32H Int64Regs:$a), (i32 0), (i32 0x0123)),
(INT_NVVM_PRMT (I64toI32L Int64Regs:$a), (i32 0), (i32 0x0123)))>;
+
+
+////////////////////////////////////////////////////////////////////////////////
+// PTX Fence instructions
+////////////////////////////////////////////////////////////////////////////////
+
+def atomic_thread_fence_seq_cst_sys :
+ NVPTXInst<(outs), (ins), "fence.sc.sys;", []>,
+ Requires<[hasPTX<60>, hasSM<70>]>;
+def atomic_thread_fence_acq_rel_sys :
+ NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>,
+ Requires<[hasPTX<60>, hasSM<70>]>;
+
+def atomic_thread_fence_seq_cst_sys_membar :
+ NVPTXInst<(outs), (ins), "membar.sys;", []>;
+
+def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1)
+ Requires<[hasPTX<60>, hasSM<70>]>;
+def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1)
+ Requires<[hasPTX<60>, hasSM<70>]>;
+def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acq_rel(6) sys(1)
+ Requires<[hasPTX<60>, hasSM<70>]>;
+def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
+ Requires<[hasPTX<60>, hasSM<70>]>;
+
+def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acquire(4) sys(1)
+def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // release(5) sys(1)
+def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acq_rel(6) sys(1)
+def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // seq_cst(7) sys(1)
diff --git a/llvm/test/CodeGen/NVPTX/fence.ll b/llvm/test/CodeGen/NVPTX/fence.ll
new file mode 100644
index 0000000000000..d3aace95e9665
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/fence.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM60
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | FileCheck %s --check-prefix=SM70
+; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx60 | %ptxas-verify -arch=sm_70 %}
+
+; CHECK-LABEL: fence_sc_sys
+define void @fence_sc_sys() local_unnamed_addr {
+ ; SM60: membar.sys
+ ; SM70: fence.sc.sys
+ fence seq_cst
+ ret void
+}
+
+; CHECK-LABEL: fence_acq_rel_sys
+define void @fence_acq_rel_sys() local_unnamed_addr {
+ ; SM60: membar.sys
+ ; SM70: fence.acq_rel.sys
+ fence acq_rel
+ ret void
+}
+
+; CHECK-LABEL: fence_release_sys
+define void @fence_release_sys() local_unnamed_addr {
+ ; SM60: membar.sys
+ ; SM70: fence.acq_rel.sys
+ fence release
+ ret void
+}
+
+; CHECK-LABEL: fence_acquire_sys
+define void @fence_acquire_sys() local_unnamed_addr {
+ ; SM60: membar.sys
+ ; SM70: fence.acq_rel.sys
+ fence acquire
+ ret void
+}
\ No newline at end of file
>From 021fd6df55fa154e22e5adf519b187b1387ad2c5 Mon Sep 17 00:00:00 2001
From: Gonzalo Brito Gadeschi <gonzalob at nvidia.com>
Date: Tue, 16 Jul 2024 07:00:13 -0700
Subject: [PATCH 2/3] Fixup: use INT_MEMBAR_SYS instead of redefining the
membar.sys opcode
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index c5146e9f33088..b685949c5eb79 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3921,9 +3921,6 @@ def atomic_thread_fence_acq_rel_sys :
NVPTXInst<(outs), (ins), "fence.acq_rel.sys;", []>,
Requires<[hasPTX<60>, hasSM<70>]>;
-def atomic_thread_fence_seq_cst_sys_membar :
- NVPTXInst<(outs), (ins), "membar.sys;", []>;
-
def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // acquire(4) sys(1)
Requires<[hasPTX<60>, hasSM<70>]>;
def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, // release(5) sys(1)
@@ -3933,7 +3930,7 @@ def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, /
def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
Requires<[hasPTX<60>, hasSM<70>]>;
-def : Pat<(atomic_fence (i64 4), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acquire(4) sys(1)
-def : Pat<(atomic_fence (i64 5), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // release(5) sys(1)
-def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // acq_rel(6) sys(1)
-def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys_membar)>; // seq_cst(7) sys(1)
+def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1)
+def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1)
+def : Pat<(atomic_fence (i64 6), (i64 1)), (INT_MEMBAR_SYS)>; // acq_rel(6) sys(1)
+def : Pat<(atomic_fence (i64 7), (i64 1)), (INT_MEMBAR_SYS)>; // seq_cst(7) sys(1)
>From ba4d6ec183c4197f3157e05ffc83b9283863169a Mon Sep 17 00:00:00 2001
From: gonzalobg <65027571+gonzalobg at users.noreply.github.com>
Date: Fri, 19 Jul 2024 23:57:15 +0200
Subject: [PATCH 3/3] [NVPTX] Add comment to clarify pre sm70 behavior
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b685949c5eb79..91b239a52d17f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3930,6 +3930,8 @@ def : Pat<(atomic_fence (i64 6), (i64 1)), (atomic_thread_fence_acq_rel_sys)>, /
def : Pat<(atomic_fence (i64 7), (i64 1)), (atomic_thread_fence_seq_cst_sys)>, // seq_cst(7) sys(1)
Requires<[hasPTX<60>, hasSM<70>]>;
+
+// If PTX<60 or SM<70, we fall back to MEMBAR:
def : Pat<(atomic_fence (i64 4), (i64 1)), (INT_MEMBAR_SYS)>; // acquire(4) sys(1)
def : Pat<(atomic_fence (i64 5), (i64 1)), (INT_MEMBAR_SYS)>; // release(5) sys(1)
def : Pat<(atomic_fence (i64 6), (i64 1)), (INT_MEMBAR_SYS)>; // acq_rel(6) sys(1)
More information about the llvm-commits
mailing list