[llvm] 369adba - [NVPTX] 64-bit atom.{and,or,xor,min,max} require sm_32 or higher
Andrew Savonichev via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 14 07:08:10 PDT 2022
Author: Andrew Savonichev
Date: 2022-04-14T17:07:51+03:00
New Revision: 369adba0435e22722c6291142b2ce4265ee36ca3
URL: https://github.com/llvm/llvm-project/commit/369adba0435e22722c6291142b2ce4265ee36ca3
DIFF: https://github.com/llvm/llvm-project/commit/369adba0435e22722c6291142b2ce4265ee36ca3.diff
LOG: [NVPTX] 64-bit atom.{and,or,xor,min,max} require sm_32 or higher
PTX ISA spec, s9.7.12.4. Parallel Synchronization and Communication
Instructions: atom
Target ISA Notes
64-bit atom.{and,or,xor,min,max} require sm_32 or higher.
Differential Revision: https://reviews.llvm.org/D123038
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/NVPTX/atomics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 4f61171c6cad1..7589959256cd9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -156,6 +156,7 @@ def hasPTX71 : Predicate<"Subtarget->getPTXVersion() >= 71">;
def hasPTX72 : Predicate<"Subtarget->getPTXVersion() >= 72">;
def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
+def hasSM32 : Predicate<"Subtarget->getSmVersion() >= 32">;
def hasSM53 : Predicate<"Subtarget->getSmVersion() >= 53">;
def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 176ba20972af6..2fcdd98e7adee 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1658,13 +1658,13 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
- ".max", atomic_load_max_64_g, i64imm, imm>;
+ ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
- ".max", atomic_load_max_64_s, i64imm, imm>;
+ ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
- atomic_load_max_64_gen, i64imm, imm>;
+ atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
+ ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1674,13 +1674,13 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
- ".max", atomic_load_umax_64_g, i64imm, imm>;
+ ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
- ".max", atomic_load_umax_64_s, i64imm, imm>;
+ ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
- atomic_load_umax_64_gen, i64imm, imm>;
+ atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
+ ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
// atom_min
@@ -1718,13 +1718,13 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
- ".min", atomic_load_min_64_g, i64imm, imm>;
+ ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
- ".min", atomic_load_min_64_s, i64imm, imm>;
+ ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
- atomic_load_min_64_gen, i64imm, imm>;
+ atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
+ ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1734,13 +1734,13 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
- ".min", atomic_load_umin_64_g, i64imm, imm>;
+ ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
- ".min", atomic_load_umin_64_s, i64imm, imm>;
+ ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
- atomic_load_umin_64_gen, i64imm, imm>;
+ atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
+ ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
// atom_inc atom_dec
@@ -1798,13 +1798,13 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
- atomic_load_and_64_g, i64imm, imm>;
+ atomic_load_and_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
- atomic_load_and_64_s, i64imm, imm>;
+ atomic_load_and_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
- atomic_load_and_64_gen, i64imm, imm>;
+ atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".and", atomic_load_and_64_gen, i64imm, imm>;
+ ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
// atom_or
@@ -1830,13 +1830,13 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm>;
defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
- atomic_load_or_64_g, i64imm, imm>;
+ atomic_load_or_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
- atomic_load_or_64_gen, i64imm, imm>;
+ atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".or", atomic_load_or_64_gen, i64imm, imm>;
+ ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
- atomic_load_or_64_s, i64imm, imm>;
+ atomic_load_or_64_s, i64imm, imm, [hasSM32]>;
// atom_xor
@@ -1862,13 +1862,13 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
- atomic_load_xor_64_g, i64imm, imm>;
+ atomic_load_xor_64_g, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
- atomic_load_xor_64_s, i64imm, imm>;
+ atomic_load_xor_64_s, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
- atomic_load_xor_64_gen, i64imm, imm>;
+ atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".xor", atomic_load_xor_64_gen, i64imm, imm>;
+ ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
// atom_cas
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index fd284adcb5234..982c29faaf509 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
; CHECK-LABEL: atom0
More information about the llvm-commits
mailing list