[llvm] 369adba - [NVPTX] 64-bit atom.{and,or,xor,min,max} require sm_32 or higher

Andrew Savonichev via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 14 07:08:10 PDT 2022


Author: Andrew Savonichev
Date: 2022-04-14T17:07:51+03:00
New Revision: 369adba0435e22722c6291142b2ce4265ee36ca3

URL: https://github.com/llvm/llvm-project/commit/369adba0435e22722c6291142b2ce4265ee36ca3
DIFF: https://github.com/llvm/llvm-project/commit/369adba0435e22722c6291142b2ce4265ee36ca3.diff

LOG: [NVPTX] 64-bit atom.{and,or,xor,min,max} require sm_32 or higher

PTX ISA spec, s9.7.12.4. Parallel Synchronization and Communication
Instructions: atom

Target ISA Notes
64-bit atom.{and,or,xor,min,max} require sm_32 or higher.

Differential Revision: https://reviews.llvm.org/D123038

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
    llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/test/CodeGen/NVPTX/atomics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 4f61171c6cad1..7589959256cd9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -156,6 +156,7 @@ def hasPTX71 : Predicate<"Subtarget->getPTXVersion() >= 71">;
 def hasPTX72 : Predicate<"Subtarget->getPTXVersion() >= 72">;
 
 def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
+def hasSM32 : Predicate<"Subtarget->getSmVersion() >= 32">;
 def hasSM53 : Predicate<"Subtarget->getSmVersion() >= 53">;
 def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
 def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;

diff  --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 176ba20972af6..2fcdd98e7adee 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1658,13 +1658,13 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
 defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
 defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
-  ".max", atomic_load_max_64_g, i64imm, imm>;
+  ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
-  ".max", atomic_load_max_64_s, i64imm, imm>;
+  ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
-  atomic_load_max_64_gen, i64imm, imm>;
+  atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
-  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
+  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   ".max", atomic_load_umax_32_g, i32imm, imm>;
 defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1674,13 +1674,13 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
 defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
 defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
-  ".max", atomic_load_umax_64_g, i64imm, imm>;
+  ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
-  ".max", atomic_load_umax_64_s, i64imm, imm>;
+  ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
-  atomic_load_umax_64_gen, i64imm, imm>;
+  atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
-  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
+  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
 
 // atom_min
 
@@ -1718,13 +1718,13 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
 defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
 defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
-  ".min", atomic_load_min_64_g, i64imm, imm>;
+  ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
-  ".min", atomic_load_min_64_s, i64imm, imm>;
+  ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
-  atomic_load_min_64_gen, i64imm, imm>;
+  atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
-  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
+  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
   ".min", atomic_load_umin_32_g, i32imm, imm>;
 defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1734,13 +1734,13 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
 defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
   ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
 defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
-  ".min", atomic_load_umin_64_g, i64imm, imm>;
+  ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
-  ".min", atomic_load_umin_64_s, i64imm, imm>;
+  ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
-  atomic_load_umin_64_gen, i64imm, imm>;
+  atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
-  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
+  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
 
 // atom_inc  atom_dec
 
@@ -1798,13 +1798,13 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
 defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   ".and", atomic_load_and_32_gen, i32imm, imm>;
 defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
-  atomic_load_and_64_g, i64imm, imm>;
+  atomic_load_and_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
-  atomic_load_and_64_s, i64imm, imm>;
+  atomic_load_and_64_s, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
-  atomic_load_and_64_gen, i64imm, imm>;
+  atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
-  ".and", atomic_load_and_64_gen, i64imm, imm>;
+  ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
 
 // atom_or
 
@@ -1830,13 +1830,13 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
 defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
   atomic_load_or_32_s, i32imm, imm>;
 defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
-  atomic_load_or_64_g, i64imm, imm>;
+  atomic_load_or_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
-  atomic_load_or_64_gen, i64imm, imm>;
+  atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
-  ".or", atomic_load_or_64_gen, i64imm, imm>;
+  ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
-  atomic_load_or_64_s, i64imm, imm>;
+  atomic_load_or_64_s, i64imm, imm, [hasSM32]>;
 
 // atom_xor
 
@@ -1862,13 +1862,13 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
 defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
   ".xor", atomic_load_xor_32_gen, i32imm, imm>;
 defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
-  atomic_load_xor_64_g, i64imm, imm>;
+  atomic_load_xor_64_g, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
-  atomic_load_xor_64_s, i64imm, imm>;
+  atomic_load_xor_64_s, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
-  atomic_load_xor_64_gen, i64imm, imm>;
+  atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
 defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
-  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
+  ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
 
 // atom_cas
 

diff  --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index fd284adcb5234..982c29faaf509 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
 
 
 ; CHECK-LABEL: atom0


        


More information about the llvm-commits mailing list