[llvm] [NVPTX] Cleanup and refactor atomic lowering (PR #133781)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 16:21:55 PDT 2025
================
@@ -1975,703 +1975,237 @@ def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$
// Atomic Functions
//-----------------------------------
-class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, AS_match.global>;
-class ATOMIC_SHARED_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, AS_match.shared>;
-class ATOMIC_GENERIC_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, AS_match.generic>;
-
-multiclass F_ATOMIC_2<
- ValueType regT, NVPTXRegClass regclass,
- string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
- Operand IMMType, SDNode IMM, list<Predicate> Pred = []> {
- let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
- def r : NVPTXInst<(outs regclass:$dst), (ins ADDR:$addr, regclass:$b),
- "atom" # SpaceStr # OpcStr # TypeStr # " \t$dst, [$addr], $b;",
- [(set (regT regclass:$dst), (IntOp addr:$addr, (regT regclass:$b)))]>,
- Requires<Pred>;
- if !not(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16"))) then
- def i : NVPTXInst<(outs regclass:$dst), (ins ADDR:$addr, IMMType:$b),
- "atom" # SpaceStr # OpcStr # TypeStr # " \t$dst, [$addr], $b;",
- [(set (regT regclass:$dst), (IntOp addr:$addr, IMM:$b))]>,
- Requires<Pred>;
- }
-}
+class ATOMIC_GLOBAL_CHK <dag frag>
+ : PatFrag<!setdagop(frag, ops), frag, AS_match.global>;
+class ATOMIC_SHARED_CHK <dag frag>
+ : PatFrag<!setdagop(frag, ops), frag, AS_match.shared>;
+class ATOMIC_GENERIC_CHK <dag frag>
+ : PatFrag<!setdagop(frag, ops), frag, AS_match.generic>;
+
-// has 2 operands, neg the second one
-multiclass F_ATOMIC_2_NEG<
- ValueType regT, NVPTXRegClass regclass,
- string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
- list<Predicate> Pred = []> {
+multiclass F_ATOMIC_2<RegTyInfo t, string sem_str, string as_str, string op_str,
+ SDPatternOperator op, list<Predicate> preds> {
+ defvar asm_str = "atom" # sem_str # as_str # "." # op_str # " \t$dst, [$addr], $b;";
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
- def reg : NVPTXInst<(outs regclass:$dst), (ins ADDR:$addr, regclass:$b),
- !strconcat(
- "{{ \n\t",
- ".reg \t.s", TypeStr, " temp; \n\t",
- "neg.s", TypeStr, " \ttemp, $b; \n\t",
- "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
- "}}"),
- [(set (regT regclass:$dst), (IntOp addr:$addr, (regT regclass:$b)))]>,
- Requires<Pred>;
+ def r : NVPTXInst<(outs t.RC:$dst), (ins ADDR:$addr, t.RC:$b),
+ asm_str,
+ [(set t.Ty:$dst, (op addr:$addr, t.Ty:$b))]>,
+ Requires<preds>;
+ if t.SupportsImm then
+ def i : NVPTXInst<(outs t.RC:$dst), (ins ADDR:$addr, t.Imm:$b),
+ asm_str,
+ [(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b)))]>,
+ Requires<preds>;
}
}
// has 3 operands
-multiclass F_ATOMIC_3<
- ValueType regT, NVPTXRegClass regclass, string SemStr,
- string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
- Operand IMMType, list<Predicate> Pred = []> {
+multiclass F_ATOMIC_3<RegTyInfo t, string sem_str, string as_str, string op_str,
+ SDPatternOperator op, list<Predicate> preds> {
+ defvar asm_str = "atom" # sem_str # as_str # "." # op_str # " \t$dst, [$addr], $b, $c;";
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
- def rr : NVPTXInst<(outs regclass:$dst),
- (ins ADDR:$addr, regclass:$b, regclass:$c),
- "atom" # SemStr # SpaceStr # OpcStr # TypeStr # " \t$dst, [$addr], $b, $c;",
- [(set (regT regclass:$dst), (IntOp addr:$addr, regT:$b, regT:$c))]>,
- Requires<Pred>;
-
- def ir : NVPTXInst<(outs regclass:$dst),
- (ins ADDR:$addr, IMMType:$b, regclass:$c),
- "atom" # SemStr # SpaceStr # OpcStr # TypeStr # " \t$dst, [$addr], $b, $c;",
- [(set (regT regclass:$dst), (IntOp addr:$addr, imm:$b, regT:$c))]>,
- Requires<Pred>;
-
- def ri : NVPTXInst<(outs regclass:$dst),
- (ins ADDR:$addr, regclass:$b, IMMType:$c),
- "atom" # SemStr # SpaceStr # OpcStr # TypeStr # " \t$dst, [$addr], $b, $c;",
- [(set (regT regclass:$dst), (IntOp addr:$addr, regT:$b, imm:$c))]>,
- Requires<Pred>;
-
- def ii : NVPTXInst<(outs regclass:$dst),
- (ins ADDR:$addr, IMMType:$b, IMMType:$c),
- "atom" # SemStr # SpaceStr # OpcStr # TypeStr # " \t$dst, [$addr], $b, $c;",
- [(set (regT regclass:$dst), (IntOp addr:$addr, imm:$b, imm:$c))]>,
- Requires<Pred>;
+ def rr : NVPTXInst<(outs t.RC:$dst),
+ (ins ADDR:$addr, t.RC:$b, t.RC:$c),
+ asm_str,
+ [(set t.Ty:$dst, (op addr:$addr, t.Ty:$b, t.Ty:$c))]>,
+ Requires<preds>;
+
+ def ir : NVPTXInst<(outs t.RC:$dst),
+ (ins ADDR:$addr, t.Imm:$b, t.RC:$c),
+ asm_str,
+ [(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b), t.Ty:$c))]>,
+ Requires<preds>;
+
+ def ri : NVPTXInst<(outs t.RC:$dst),
+ (ins ADDR:$addr, t.RC:$b, t.Imm:$c),
+ asm_str,
+ [(set t.Ty:$dst, (op addr:$addr, t.Ty:$b, (t.Ty t.ImmNode:$c)))]>,
+ Requires<preds>;
+
+ def ii : NVPTXInst<(outs t.RC:$dst),
+ (ins ADDR:$addr, t.Imm:$b, t.Imm:$c),
+ asm_str,
+ [(set t.Ty:$dst, (op addr:$addr, (t.Ty t.ImmNode:$b), (t.Ty t.ImmNode:$c)))]>,
+ Requires<preds>;
}
}
+multiclass F_ATOMIC_2_AS<RegTyInfo t, SDPatternOperator frag, string op_str, list<Predicate> preds = []> {
+ defvar frag_pat = (frag node:$a, node:$b);
+ defm _G : F_ATOMIC_2<t, "", ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
+ defm _S : F_ATOMIC_2<t, "", ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
+ defm _GEN : F_ATOMIC_2<t, "", "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
+}
+
+multiclass F_ATOMIC_3_AS<RegTyInfo t, SDPatternOperator frag, string sem_str, string op_str, list<Predicate> preds = []> {
+ defvar frag_pat = (frag node:$a, node:$b, node:$c);
+ defm _G : F_ATOMIC_3<t, sem_str, ".global", op_str, ATOMIC_GLOBAL_CHK<frag_pat>, preds>;
+ defm _S : F_ATOMIC_3<t, sem_str, ".shared", op_str, ATOMIC_SHARED_CHK<frag_pat>, preds>;
+ defm _GEN : F_ATOMIC_3<t, sem_str, "", op_str, ATOMIC_GENERIC_CHK<frag_pat>, preds>;
+}
+
// atom_add
+defm INT_PTX_ATOM_ADD_32 : F_ATOMIC_2_AS<I32RT, atomic_load_add_i32, "add.u32">;
+defm INT_PTX_ATOM_ADD_64 : F_ATOMIC_2_AS<I64RT, atomic_load_add_i64, "add.u64">;
-def atomic_load_add_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_add_i32 node:$a, node:$b)>;
-def atomic_load_add_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_add_i32 node:$a, node:$b)>;
-def atomic_load_add_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_add_i32 node:$a, node:$b)>;
-def atomic_load_add_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_add_i64 node:$a, node:$b)>;
-def atomic_load_add_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_add_i64 node:$a, node:$b)>;
-def atomic_load_add_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_add_i64 node:$a, node:$b)>;
-def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_fadd node:$a, node:$b)>;
-def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_fadd node:$a, node:$b)>;
-def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_fadd node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
- atomic_load_add_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
- atomic_load_add_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
- atomic_load_add_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
- ".add", atomic_load_add_i32_gen, i32imm, imm>;
-
-defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
- atomic_load_add_i64_g, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
- atomic_load_add_i64_s, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
- atomic_load_add_i64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
- ".add", atomic_load_add_i64_gen, i64imm, imm>;
-
-defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
- atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
-defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
- atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
-defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
- atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
-
-defm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz",
- atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
-defm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz",
- atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
-defm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz",
- atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
-
-defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
- atomic_load_add_g, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
- atomic_load_add_s, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
- atomic_load_add_gen, f32imm, fpimm>;
-
-defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
- atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
-defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
- atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
-defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
- atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
-
-// atom_sub
-
-def atomic_load_sub_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_sub_i32 node:$a, node:$b)>;
-def atomic_load_sub_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_sub_i32 node:$a, node:$b)>;
-def atomic_load_sub_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_sub_i32 node:$a, node:$b)>;
-def atomic_load_sub_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_sub_i64 node:$a, node:$b)>;
-def atomic_load_sub_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_sub_i64 node:$a, node:$b)>;
-def atomic_load_sub_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_sub_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
- atomic_load_sub_i32_g>;
-defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
- atomic_load_sub_i64_g>;
-defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
- atomic_load_sub_i32_gen>;
-defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
- ".add", atomic_load_sub_i32_gen>;
-defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
- atomic_load_sub_i32_s>;
-defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
- atomic_load_sub_i64_s>;
-defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
- atomic_load_sub_i64_gen>;
-defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
- ".add", atomic_load_sub_i64_gen>;
+defm INT_PTX_ATOM_ADD_F16 : F_ATOMIC_2_AS<F16RT, atomic_load_fadd, "add.noftz.f16", [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_ADD_BF16 : F_ATOMIC_2_AS<BF16RT, atomic_load_fadd, "add.noftz.bf16", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_ATOM_ADD_F32 : F_ATOMIC_2_AS<F32RT, atomic_load_fadd, "add.f32">;
+defm INT_PTX_ATOM_ADD_F64 : F_ATOMIC_2_AS<F64RT, atomic_load_fadd, "add.f64", [hasAtomAddF64]>;
// atom_swap
-
-def atomic_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_swap_i32 node:$a, node:$b)>;
-def atomic_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_swap_i32 node:$a, node:$b)>;
-def atomic_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_swap_i32 node:$a, node:$b)>;
-def atomic_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_swap_i64 node:$a, node:$b)>;
-def atomic_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_swap_i64 node:$a, node:$b)>;
-def atomic_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_swap_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
- atomic_swap_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
- atomic_swap_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
- atomic_swap_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
- ".exch", atomic_swap_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
- atomic_swap_i64_g, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
- atomic_swap_i64_s, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
- atomic_swap_i64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
- ".exch", atomic_swap_i64_gen, i64imm, imm>;
+defm INT_PTX_ATOM_SWAP_32 : F_ATOMIC_2_AS<I32RT, atomic_swap_i32, "exch.b32">;
+defm INT_PTX_ATOM_SWAP_64 : F_ATOMIC_2_AS<I64RT, atomic_swap_i64, "exch.b64">;
// atom_max
-
-def atomic_load_max_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
- , (atomic_load_max_i32 node:$a, node:$b)>;
-def atomic_load_max_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_max_i32 node:$a, node:$b)>;
-def atomic_load_max_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_max_i32 node:$a, node:$b)>;
-def atomic_load_max_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
- , (atomic_load_max_i64 node:$a, node:$b)>;
-def atomic_load_max_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_max_i64 node:$a, node:$b)>;
-def atomic_load_max_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_max_i64 node:$a, node:$b)>;
-def atomic_load_umax_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_umax_i32 node:$a, node:$b)>;
-def atomic_load_umax_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_umax_i32 node:$a, node:$b)>;
-def atomic_load_umax_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_umax_i32 node:$a, node:$b)>;
-def atomic_load_umax_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_umax_i64 node:$a, node:$b)>;
-def atomic_load_umax_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_umax_i64 node:$a, node:$b)>;
-def atomic_load_umax_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_umax_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
- ".max", atomic_load_max_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
- ".max", atomic_load_max_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
- atomic_load_max_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
- ".s32", ".max", atomic_load_max_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
- ".max", atomic_load_max_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
- ".max", atomic_load_max_i64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
- atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
- ".s64", ".max", atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
- ".max", atomic_load_umax_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
- ".max", atomic_load_umax_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
- atomic_load_umax_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
- ".u32", ".max", atomic_load_umax_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
- ".max", atomic_load_umax_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
- ".max", atomic_load_umax_i64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
- atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
- ".u64", ".max", atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
+defm INT_PTX_ATOMIC_MAX_32 : F_ATOMIC_2_AS<I32RT, atomic_load_max_i32, "max.s32">;
+defm INT_PTX_ATOMIC_MAX_64 : F_ATOMIC_2_AS<I64RT, atomic_load_max_i64, "max.s64", [hasSM<32>]>;
+defm INT_PTX_ATOMIC_UMAX_32 : F_ATOMIC_2_AS<I32RT, atomic_load_umax_i32, "max.u32">;
+defm INT_PTX_ATOMIC_UMAX_64 : F_ATOMIC_2_AS<I64RT, atomic_load_umax_i64, "max.u64", [hasSM<32>]>;
// atom_min
-
-def atomic_load_min_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_min_i32 node:$a, node:$b)>;
-def atomic_load_min_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_min_i32 node:$a, node:$b)>;
-def atomic_load_min_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_min_i32 node:$a, node:$b)>;
-def atomic_load_min_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_min_i64 node:$a, node:$b)>;
-def atomic_load_min_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_min_i64 node:$a, node:$b)>;
-def atomic_load_min_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_min_i64 node:$a, node:$b)>;
-def atomic_load_umin_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_umin_i32 node:$a, node:$b)>;
-def atomic_load_umin_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_umin_i32 node:$a, node:$b)>;
-def atomic_load_umin_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_umin_i32 node:$a, node:$b)>;
-def atomic_load_umin_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_umin_i64 node:$a, node:$b)>;
-def atomic_load_umin_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_umin_i64 node:$a, node:$b)>;
-def atomic_load_umin_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_umin_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
- ".min", atomic_load_min_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
- ".min", atomic_load_min_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
- atomic_load_min_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
- ".s32", ".min", atomic_load_min_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
- ".min", atomic_load_min_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
- ".min", atomic_load_min_i64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
- atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
- ".s64", ".min", atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
- ".min", atomic_load_umin_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
- ".min", atomic_load_umin_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
- atomic_load_umin_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
- ".u32", ".min", atomic_load_umin_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
- ".min", atomic_load_umin_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
- ".min", atomic_load_umin_i64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
- atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
- ".u64", ".min", atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
+defm INT_PTX_ATOMIC_MIN_32 : F_ATOMIC_2_AS<I32RT, atomic_load_min_i32, "min.s32">;
+defm INT_PTX_ATOMIC_MIN_64 : F_ATOMIC_2_AS<I64RT, atomic_load_min_i64, "min.s64", [hasSM<32>]>;
+defm INT_PTX_ATOMIC_UMIN_32 : F_ATOMIC_2_AS<I32RT, atomic_load_umin_i32, "min.u32">;
+defm INT_PTX_ATOMIC_UMIN_64 : F_ATOMIC_2_AS<I64RT, atomic_load_umin_i64, "min.u64", [hasSM<32>]>;
// atom_inc atom_dec
-
-def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
-def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
-def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
-def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
-def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
-def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
- atomic_load_inc_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
- atomic_load_inc_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
- atomic_load_inc_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
- ".inc", atomic_load_inc_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
- atomic_load_dec_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
- atomic_load_dec_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
- atomic_load_dec_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
- ".dec", atomic_load_dec_32_gen, i32imm, imm>;
+defm INT_PTX_ATOM_INC_32 : F_ATOMIC_2_AS<I32RT, int_nvvm_atomic_load_inc_32, "inc.u32">;
+defm INT_PTX_ATOM_DEC_32 : F_ATOMIC_2_AS<I32RT, int_nvvm_atomic_load_dec_32, "dec.u32">;
// atom_and
-
-def atomic_load_and_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_and_i32 node:$a, node:$b)>;
-def atomic_load_and_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_and_i32 node:$a, node:$b)>;
-def atomic_load_and_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_and_i32 node:$a, node:$b)>;
-def atomic_load_and_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_and_i64 node:$a, node:$b)>;
-def atomic_load_and_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_and_i64 node:$a, node:$b)>;
-def atomic_load_and_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_and_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
- atomic_load_and_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
- atomic_load_and_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
- atomic_load_and_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
- ".and", atomic_load_and_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
- atomic_load_and_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
- atomic_load_and_i64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
- atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
- ".and", atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
+defm INT_PTX_ATOM_AND_32 : F_ATOMIC_2_AS<I32RT, atomic_load_and_i32, "and.b32">;
+defm INT_PTX_ATOM_AND_64 : F_ATOMIC_2_AS<I64RT, atomic_load_and_i64, "and.b64", [hasSM<32>]>;
// atom_or
-
-def atomic_load_or_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_or_i32 node:$a, node:$b)>;
-def atomic_load_or_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_or_i32 node:$a, node:$b)>;
-def atomic_load_or_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_or_i32 node:$a, node:$b)>;
-def atomic_load_or_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_or_i64 node:$a, node:$b)>;
-def atomic_load_or_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_or_i64 node:$a, node:$b)>;
-def atomic_load_or_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_or_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
- atomic_load_or_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
- atomic_load_or_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
- ".or", atomic_load_or_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
- atomic_load_or_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
- atomic_load_or_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
- atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
- ".or", atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
- atomic_load_or_i64_s, i64imm, imm, [hasSM<32>]>;
+defm INT_PTX_ATOM_OR_32 : F_ATOMIC_2_AS<I32RT, atomic_load_or_i32, "or.b32">;
+defm INT_PTX_ATOM_OR_64 : F_ATOMIC_2_AS<I64RT, atomic_load_or_i64, "or.b64", [hasSM<32>]>;
// atom_xor
+defm INT_PTX_ATOM_XOR_32 : F_ATOMIC_2_AS<I32RT, atomic_load_xor_i32, "xor.b32">;
+defm INT_PTX_ATOM_XOR_64 : F_ATOMIC_2_AS<I64RT, atomic_load_xor_i64, "xor.b64", [hasSM<32>]>;
-def atomic_load_xor_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_xor_i32 node:$a, node:$b)>;
-def atomic_load_xor_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_xor_i32 node:$a, node:$b)>;
-def atomic_load_xor_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_xor_i32 node:$a, node:$b)>;
-def atomic_load_xor_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (atomic_load_xor_i64 node:$a, node:$b)>;
-def atomic_load_xor_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (atomic_load_xor_i64 node:$a, node:$b)>;
-def atomic_load_xor_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (atomic_load_xor_i64 node:$a, node:$b)>;
-
-defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
- atomic_load_xor_i32_g, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
- atomic_load_xor_i32_s, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
- atomic_load_xor_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
- ".xor", atomic_load_xor_i32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
- atomic_load_xor_i64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
- atomic_load_xor_i64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
- atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
- ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
-
-multiclass ternary_atomic_op_as {
- // one record per address space
- def NAME#_generic: PatFrag<(ops node:$ptr, node:$cmp, node:$val),
- (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val),
- AS_match.generic>;
-
- def NAME#_global: PatFrag<(ops node:$ptr, node:$cmp, node:$val),
- (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val),
- AS_match.global>;
-
- def NAME#_shared: PatFrag<(ops node:$ptr, node:$cmp, node:$val),
- (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val),
- AS_match.shared>;
-}
-
-// generate pattern fragments for size x memory order
-// NOTE: i8 cmpxchg is not supported in ptx, and AtomicExpandPass will emulate all i8 cmpxchgs
-// using larger-bitwidth cas
-foreach size = ["i16", "i32", "i64"] in {
- foreach order = ["", "_monotonic", "_acquire", "_release", "_acq_rel", "_seq_cst"] in {
- defm atomic_cmp_swap#_#size#order: ternary_atomic_op_as;
- }
-}
-
-// eg. with type = 32, order = ".acquire", addrspace = ".global",
-// atomic_cmp_swap_pat = atomic_cmp_swap_i32_acquire_global.
-// preds = [hasSM<70>, hasPTX<63>]
-// F_ATOMIC_3<i32, Int32Regs, ".acquire", ".global", ".b32",
-// ".cas", atomic_cmp_swap_i32_acquire_global, i32imm,
-// [hasSM<70>, hasPTX<63>]>
-multiclass INT_PTX_ATOM_CAS<string atomic_cmp_swap_pat, string type,
- string order, string addrspace, list<Predicate> preds>
- : F_ATOMIC_3<!cast<ValueType>("i"#type),
- !cast<NVPTXRegClass>("Int"#type#"Regs"),
- order,
- addrspace,
- ".b"#type,
- ".cas",
- !cast<PatFrag>(atomic_cmp_swap_pat),
- !cast<Operand>("i"#type#"imm"),
- preds>;
// Define atom.cas for all combinations of size x addrspace x memory order
// supported in PTX *and* on the hardware.
-foreach size = ["32", "64"] in {
- foreach addrspace = ["generic", "global", "shared"] in {
- defvar cas_addrspace_string = !if(!eq(addrspace, "generic"), "", "."#addrspace);
- foreach order = ["acquire", "release", "acq_rel", "monotonic"] in {
- defvar cas_order_string = !if(!eq(order, "monotonic"), ".relaxed", "."#order);
- // Note that AtomicExpand will convert cmpxchg seq_cst to a cmpxchg monotonic with fences around it.
- // Memory orders are only supported for SM70+, PTX63+- so we have two sets of instruction definitions-
- // for SM70+, and "old" ones which lower to "atom.cas", for earlier archs.
- defm INT_PTX_ATOM_CAS_#size#_#order#addrspace
- : INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace, size,
- cas_order_string, cas_addrspace_string,
- [hasSM<70>, hasPTX<63>]>;
- defm INT_PTX_ATOM_CAS_#size#_#order#_old#addrspace
- : INT_PTX_ATOM_CAS<"atomic_cmp_swap_i"#size#_#order#_#addrspace, size,
- "", cas_addrspace_string, []>;
- }
+foreach t = [I32RT, I64RT] in {
+ foreach order = ["acquire", "release", "acq_rel", "monotonic"] in {
+ defvar cas_order_string = !if(!eq(order, "monotonic"), ".relaxed", "."#order);
+ defvar atomic_cmp_swap_pat = !cast<PatFrag>("atomic_cmp_swap_i"#t.Size#_#order);
+ // Note that AtomicExpand will convert cmpxchg seq_cst to a cmpxchg monotonic with fences around it.
+ // Memory orders are only supported for SM70+, PTX63+- so we have two sets of instruction definitions-
+ // for SM70+, and "old" ones which lower to "atom.cas", for earlier archs.
+ defm INT_PTX_ATOM_CAS_#t.Size#_#order
+ : F_ATOMIC_3_AS<t, atomic_cmp_swap_pat, cas_order_string, "cas.b"#t.Size, [hasSM<70>, hasPTX<63>]>;
+ defm INT_PTX_ATOM_CAS_#t.Size#_#order#_old
+ : F_ATOMIC_3_AS<t, atomic_cmp_swap_pat, "", "cas.b"#t.Size, []>;
}
}
// Note that 16-bit CAS support in PTX is emulated.
-defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, "", ".global", ".b16", ".cas",
- atomic_cmp_swap_i16_global, i16imm, [hasSM<70>, hasPTX<63>]>;
-defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, "", ".shared", ".b16", ".cas",
- atomic_cmp_swap_i16_shared, i16imm, [hasSM<70>, hasPTX<63>]>;
-defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", "", ".b16", ".cas",
- atomic_cmp_swap_i16_generic, i16imm, [hasSM<70>, hasPTX<63>]>;
+defm INT_PTX_ATOM_CAS_16 : F_ATOMIC_3_AS<I16RT, atomic_cmp_swap_i16, "", "cas.b16", [hasSM<70>, hasPTX<63>]>;
// Support for scoped atomic operations. Matches
// int_nvvm_atomic_{op}_{space}_{type}_{scope}
// and converts it into the appropriate instruction.
// NOTE: not all possible combinations are implemented
// 'space' is limited to generic as it's the only one needed to support CUDA.
// 'scope' = 'gpu' is default and is handled by regular atomic instructions.
-class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
- dag ins, dag Operands>
- : NVPTXInst<(outs regclass:$result), ins,
- AsmStr,
- [(set regT:$result, Operands)]>,
- Requires<Preds>;
// Define instruction variants for all addressing modes.
-multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
- ValueType regT, NVPTXRegClass regclass, Operand ImmType,
- SDNode Imm, ValueType ImmTy,
- list<Predicate> Preds> {
- let AddedComplexity = 1 in {
- def : ATOM23_impl<AsmStr, regT, regclass, Preds,
- (ins ADDR:$src, regclass:$b),
- (Intr addr:$src, regT:$b)>;
- }
- // tablegen can't infer argument types from Intrinsic (though it can
- // from Instruction) so we have to enforce specific type on
- // immediates via explicit cast to ImmTy.
- def : ATOM23_impl<AsmStr, regT, regclass, Preds,
- (ins ADDR:$src, ImmType:$b),
- (Intr addr:$src, (ImmTy Imm:$b))>;
-}
-
-multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
- ValueType regT, NVPTXRegClass regclass,
- Operand ImmType, SDNode Imm, ValueType ImmTy,
- list<Predicate> Preds> {
- // Variants for register/immediate permutations of $b and $c
- let AddedComplexity = 2 in {
- def : ATOM23_impl<AsmStr, regT, regclass, Preds,
- (ins ADDR:$src, regclass:$b, regclass:$c),
- (Intr addr:$src, regT:$b, regT:$c)>;
- }
- let AddedComplexity = 1 in {
- def : ATOM23_impl<AsmStr, regT, regclass, Preds,
- (ins ADDR:$src, ImmType:$b, regclass:$c),
- (Intr addr:$src, (ImmTy Imm:$b), regT:$c)>;
- def : ATOM23_impl<AsmStr, regT, regclass, Preds,
- (ins ADDR:$src, regclass:$b, ImmType:$c),
- (Intr addr:$src, regT:$b, (ImmTy Imm:$c))>;
- }
- def : ATOM23_impl<AsmStr, regT, regclass, Preds,
- (ins ADDR:$src, ImmType:$b, ImmType:$c),
- (Intr addr:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
-}
// Constructs intrinsic name and instruction asm strings.
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
- ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
- ValueType ImmTy, list<Predicate> Preds> {
- defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
- # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
- # "." # OpStr # "." # TypeStr
- # " \t$result, [$src], $b;",
+ RegTyInfo t, list<Predicate> Preds> {
+ defm "" : F_ATOMIC_2<t, !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr), !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr),
+ OpStr # "." # TypeStr,
----------------
Artem-B wrote:
This could use some re-formatting. I'd put each argument on a separate line, possibly along with a comment stating argument name, if it's not obvious from the value.
https://github.com/llvm/llvm-project/pull/133781
More information about the llvm-commits
mailing list