[llvm] 24c5f18 - [NVPTX][NFC] Explicitly specify the matching type for Int32reg (#65527)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 6 13:50:26 PDT 2023
Author: Thomas
Date: 2023-09-06T13:50:21-07:00
New Revision: 24c5f18cf5899d267bdc478295ab56925bbb4b04
URL: https://github.com/llvm/llvm-project/commit/24c5f18cf5899d267bdc478295ab56925bbb4b04
DIFF: https://github.com/llvm/llvm-project/commit/24c5f18cf5899d267bdc478295ab56925bbb4b04.diff
LOG: [NVPTX][NFC] Explicitly specify the matching type for Int32reg (#65527)
NFC changes to explicitly specify the type we are matching when creating
Int32 reg. This will allow use to have multiple types mapping those
register without causing ambigous matching.
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b98f76ed4b38d95..4d4dcca2f53e665 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -199,11 +199,11 @@ multiclass I3<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
def i16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
@@ -221,11 +221,11 @@ multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
@@ -811,14 +811,14 @@ defm UREM : I3<"rem.u", urem>;
// Integer absolute value. NumBits should be one minus the bit width of RC.
// This idiom implements the algorithm at
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs.
-multiclass ABS<RegisterClass RC, string SizeName> {
+multiclass ABS<ValueType T, RegisterClass RC, string SizeName> {
def : NVPTXInst<(outs RC:$dst), (ins RC:$a),
!strconcat("abs", SizeName, " \t$dst, $a;"),
- [(set RC:$dst, (abs RC:$a))]>;
+ [(set (T RC:$dst), (abs (T RC:$a)))]>;
}
-defm ABS_16 : ABS<Int16Regs, ".s16">;
-defm ABS_32 : ABS<Int32Regs, ".s32">;
-defm ABS_64 : ABS<Int64Regs, ".s64">;
+defm ABS_16 : ABS<i16, Int16Regs, ".s16">;
+defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
+defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
// Integer min/max.
defm SMAX : I3<"max.s", smax>;
@@ -890,13 +890,13 @@ def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
+def : Pat<(i64 (mul_wide_signed (i32 Int32Regs:$a), imm:$b)),
(MULWIDES64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
(MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
+def : Pat<(i64 (mul_wide_unsigned (i32 Int32Regs:$a), imm:$b)),
(MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
@@ -1022,22 +1022,22 @@ def MAD32rrr :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), (i32 Int32Regs:$c)))]>;
def MAD32rri :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), (i32 Int32Regs:$b), imm:$c))]>;
def MAD32rir :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, (i32 Int32Regs:$c)))]>;
def MAD32rii :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>;
+ [(set (i32 Int32Regs:$dst), (imad (i32 Int32Regs:$a), imm:$b, imm:$c))]>;
def MAD64rrr :
NVPTXInst<(outs Int64Regs:$dst),
@@ -1067,7 +1067,7 @@ def INEG16 :
def INEG32 :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"neg.s32 \t$dst, $src;",
- [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
+ [(set (i32 Int32Regs:$dst), (ineg (i32 Int32Regs:$src)))]>;
def INEG64 :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"neg.s64 \t$dst, $src;",
@@ -1458,11 +1458,11 @@ multiclass BITWISE<string OpcStr, SDNode OpNode> {
def b32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def b32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), imm:$b))]>;
def b64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
@@ -1485,7 +1485,7 @@ def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
[(set Int16Regs:$dst, (not Int16Regs:$src))]>;
def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"not.b32 \t$dst, $src;",
- [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
+ [(set (i32 Int32Regs:$dst), (not (i32 Int32Regs:$src)))]>;
def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"not.b64 \t$dst, $src;",
[(set Int64Regs:$dst, (not Int64Regs:$src))]>;
@@ -1499,7 +1499,7 @@ multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
- [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>;
+ [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 Int32Regs:$b)))]>;
def i64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
@@ -1507,11 +1507,11 @@ multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 Int32Regs:$b)))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
- [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>;
+ [(set Int32Regs:$dst, (OpNode (i32 Int32Regs:$a), (i32 imm:$b)))]>;
def i32ii :
NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
@@ -1519,7 +1519,7 @@ multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
- [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>;
+ [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 Int32Regs:$b)))]>;
def i16ri :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
@@ -1534,7 +1534,7 @@ defm SRL : SHIFT<"shr.u", srl>;
def BREV32 :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
"brev.b32 \t$dst, $a;",
- [(set Int32Regs:$dst, (bitreverse Int32Regs:$a))]>;
+ [(set Int32Regs:$dst, (bitreverse (i32 Int32Regs:$a)))]>;
def BREV64 :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a),
"brev.b64 \t$dst, $a;",
@@ -1550,13 +1550,13 @@ def BREV64 :
def ROTL32imm_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
+ [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def ROTL32reg_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[hasHWROT32]>;
// 32 bit r2 = rotr r1, n
@@ -1565,13 +1565,13 @@ def ROTL32reg_hw :
def ROTR32imm_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
"shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
+ [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def ROTR32reg_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
"shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[hasHWROT32]>;
// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1.
@@ -1591,10 +1591,10 @@ def SUB_FRM_32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
-def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
+def : Pat<(rotl (i32 Int32Regs:$src), (i32 imm:$amt)),
(ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
Requires<[noHWROT32]>;
-def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
+def : Pat<(rotr (i32 Int32Regs:$src), (i32 imm:$amt)),
(ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
Requires<[noHWROT32]>;
@@ -1610,7 +1610,7 @@ def ROTL32reg_sw :
"shr.b32 \t%rhs, $src, %amt2;\n\t"
"add.u32 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotl (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[noHWROT32]>;
// 32-bit software rotate right by register.
@@ -1625,7 +1625,7 @@ def ROTR32reg_sw :
"shl.b32 \t%rhs, $src, %amt2;\n\t"
"add.u32 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ [(set Int32Regs:$dst, (rotr (i32 Int32Regs:$src), (i32 Int32Regs:$amt)))]>,
Requires<[noHWROT32]>;
// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1.
@@ -1662,7 +1662,7 @@ def ROTL64reg_sw :
"shr.b64 \t%rhs, $src, %amt2;\n\t"
"add.u64 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
+ [(set Int64Regs:$dst, (rotl Int64Regs:$src, (i32 Int32Regs:$amt)))]>;
def ROTR64reg_sw :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),
@@ -1675,7 +1675,7 @@ def ROTR64reg_sw :
"shl.b64 \t%rhs, $src, %amt2;\n\t"
"add.u64 \t$dst, %lhs, %rhs;\n\t"
"}}",
- [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+ [(set Int64Regs:$dst, (rotr Int64Regs:$src, (i32 Int32Regs:$amt)))]>;
//
// Funnnel shift in clamp mode
@@ -1691,14 +1691,14 @@ def FUNSHFLCLAMP :
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
"shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
[(set Int32Regs:$dst,
- (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;
+ (FUN_SHFL_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>;
def FUNSHFRCLAMP :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
"shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
[(set Int32Regs:$dst,
- (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;
+ (FUN_SHFR_CLAMP (i32 Int32Regs:$lo), (i32 Int32Regs:$hi), (i32 Int32Regs:$amt)))]>;
//
// BFE - bit-field extract
@@ -1915,7 +1915,7 @@ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
[(set Int16Regs:$dst, imm:$src)]>;
def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
"mov.u32 \t$dst, $src;",
- [(set Int32Regs:$dst, imm:$src)]>;
+ [(set (i32 Int32Regs:$dst), imm:$src)]>;
def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.u64 \t$dst, $src;",
[(set Int64Regs:$dst, imm:$src)]>;
@@ -1978,9 +1978,9 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
// i32 -> pred
def : Pat<(i1 (OpNode i32:$a, i32:$b)),
(setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
- def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
+ def : Pat<(i1 (OpNode (i32 Int32Regs:$a), imm:$b)),
(setp_32ri Int32Regs:$a, imm:$b, Mode)>;
- def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
+ def : Pat<(i1 (OpNode imm:$a, (i32 Int32Regs:$b))),
(setp_32ir imm:$a, Int32Regs:$b, Mode)>;
// i64 -> pred
def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
@@ -2000,9 +2000,9 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
// i32 -> i32
def : Pat<(i32 (OpNode i32:$a, i32:$b)),
(set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
- def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
+ def : Pat<(i32 (OpNode (i32 Int32Regs:$a), imm:$b)),
(set_32ri Int32Regs:$a, imm:$b, Mode)>;
- def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
+ def : Pat<(i32 (OpNode imm:$a, (i32 Int32Regs:$b))),
(set_32ir imm:$a, Int32Regs:$b, Mode)>;
// i64 -> i32
def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
@@ -3207,25 +3207,25 @@ def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
// Select instructions with 32-bit predicates
-def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b),
+def : Pat<(select (i32 Int32Regs:$pred), i16:$a, i16:$b),
(SELP_b16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, i32:$a, i32:$b),
+def : Pat<(select (i32 Int32Regs:$pred), i32:$a, i32:$b),
(SELP_b32rr Int32Regs:$a, Int32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
+def : Pat<(select (i32 Int32Regs:$pred), Int64Regs:$a, Int64Regs:$b),
(SELP_b64rr Int64Regs:$a, Int64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
+def : Pat<(select (i32 Int32Regs:$pred), (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
(SELP_f16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)),
+def : Pat<(select (i32 Int32Regs:$pred), (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)),
(SELP_bf16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
+def : Pat<(select (i32 Int32Regs:$pred), Float32Regs:$a, Float32Regs:$b),
(SELP_f32rr Float32Regs:$a, Float32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
+def : Pat<(select (i32 Int32Regs:$pred), Float64Regs:$a, Float64Regs:$b),
(SELP_f64rr Float64Regs:$a, Float64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
@@ -3309,7 +3309,7 @@ let hasSideEffects = false in {
}
// 32-bit has a direct PTX instruction
-def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
+def : Pat<(i32 (ctlz (i32 Int32Regs:$a))), (CLZr32 Int32Regs:$a)>;
// The return type of the ctlz ISD node is the same as its input, but the PTX
// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
@@ -3347,7 +3347,7 @@ let hasSideEffects = false in {
}
// 32-bit has a direct PTX instruction
-def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
+def : Pat<(i32 (ctpop (i32 Int32Regs:$a))), (POPCr32 Int32Regs:$a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
// to match the LLVM semantics. Just as with ctlz.i64, we provide a second
@@ -3460,7 +3460,7 @@ let isTerminator=1 in {
"bra.uni \t$target;", [(br bb:$target)]>;
}
-def : Pat<(brcond Int32Regs:$a, bb:$target),
+def : Pat<(brcond (i32 Int32Regs:$a), bb:$target),
(CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index f0de0144d410e9c..85eae44f349aa37 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1460,29 +1460,31 @@ class ATOMIC_SHARED_CHK <dag ops, dag frag>
class ATOMIC_GENERIC_CHK <dag ops, dag frag>
: PatFrag<ops, frag, AS_match.generic>;
-multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
+ ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, SDNode IMM, list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
Requires<Pred>;
def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
- [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
Requires<Pred>;
}
-multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
list<Predicate> Pred = []> {
- defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, IMM, Pred>;
- defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, IMM, Pred>;
}
// has 2 operands, neg the second one
-multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
+ ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
@@ -1492,50 +1494,51 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
"neg.s", TypeStr, " \ttemp, $b; \n\t",
"atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
"}}"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
Requires<Pred>;
}
-multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
+multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
- defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, Pred> ;
- defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, Pred> ;
}
// has 3 operands
-multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
+multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
+ ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, regclass:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
Requires<Pred>;
def imm1 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, regclass:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
Requires<Pred>;
def imm2 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, IMMType:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
Requires<Pred>;
def imm3 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, IMMType:$c),
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
Requires<Pred>;
}
-multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
+multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
- defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, Pred>;
- defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
+ defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
IntOp, IMMType, Pred>;
}
@@ -1560,36 +1563,36 @@ def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_fadd node:$a, node:$b)>;
-defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
+defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
atomic_load_add_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
+defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
atomic_load_add_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
+defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
atomic_load_add_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".add", atomic_load_add_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
+defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
atomic_load_add_64_g, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
+defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
atomic_load_add_64_s, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
+defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
atomic_load_add_64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
".add", atomic_load_add_64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
+defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
atomic_load_add_g, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
+defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
atomic_load_add_s, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
+defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
atomic_load_add_gen, f32imm, fpimm>;
-defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
+defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
-defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
+defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
-defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
+defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
// atom_sub
@@ -1607,21 +1610,21 @@ def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_sub_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
+defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
atomic_load_sub_32_g>;
-defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
+defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
atomic_load_sub_64_g>;
-defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
+defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
atomic_load_sub_32_gen>;
-defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
+defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
".add", atomic_load_sub_32_gen>;
-defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
+defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
atomic_load_sub_32_s>;
-defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
+defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
atomic_load_sub_64_s>;
-defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
+defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
atomic_load_sub_64_gen>;
-defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
+defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
".add", atomic_load_sub_64_gen>;
// atom_swap
@@ -1639,21 +1642,21 @@ def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_swap_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
+defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
atomic_swap_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
+defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
atomic_swap_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
+defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
atomic_swap_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".exch", atomic_swap_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
+defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
atomic_swap_64_g, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
+defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
atomic_swap_64_s, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
+defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
atomic_swap_64_gen, i64imm, imm>;
-defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".exch", atomic_swap_64_gen, i64imm, imm>;
// atom_max
@@ -1683,37 +1686,37 @@ def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umax_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
".max", atomic_load_max_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
".max", atomic_load_max_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
atomic_load_max_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
".max", atomic_load_umax_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
atomic_load_umax_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_min
@@ -1743,37 +1746,37 @@ def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umin_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
+defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
".min", atomic_load_min_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
+defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
".min", atomic_load_min_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
atomic_load_min_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
+defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
".min", atomic_load_umin_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
atomic_load_umin_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_inc atom_dec
@@ -1791,21 +1794,21 @@ def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
-defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
+defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
atomic_load_inc_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
+defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
atomic_load_inc_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
+defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
atomic_load_inc_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".inc", atomic_load_inc_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
+defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
atomic_load_dec_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
+defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
atomic_load_dec_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
+defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
atomic_load_dec_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
+defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
".dec", atomic_load_dec_32_gen, i32imm, imm>;
// atom_and
@@ -1823,21 +1826,21 @@ def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_and_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
+defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
atomic_load_and_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
+defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
atomic_load_and_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
+defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
atomic_load_and_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
+defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
+defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
+defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_or
@@ -1855,21 +1858,21 @@ def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_or_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
+defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
atomic_load_or_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
+defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
atomic_load_or_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".or", atomic_load_or_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
+defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
+defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
+defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
+defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
// atom_xor
@@ -1887,21 +1890,21 @@ def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_xor_64 node:$a, node:$b)>;
-defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
+defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
atomic_load_xor_32_g, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
+defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
atomic_load_xor_32_s, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
+defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
atomic_load_xor_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm>;
-defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
+defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
+defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
+defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
-defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_cas
@@ -1919,21 +1922,21 @@ def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
(atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
-defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
+defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
atomic_cmp_swap_32_g, i32imm>;
-defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
+defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
atomic_cmp_swap_32_s, i32imm>;
-defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
+defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
atomic_cmp_swap_32_gen, i32imm>;
-defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
+defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
".cas", atomic_cmp_swap_32_gen, i32imm>;
-defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
+defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
atomic_cmp_swap_64_g, i64imm>;
-defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
+defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
atomic_cmp_swap_64_s, i64imm>;
-defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
+defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
atomic_cmp_swap_64_gen, i64imm>;
-defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
+defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
".cas", atomic_cmp_swap_64_gen, i64imm>;
// Support for scoped atomic operations. Matches
@@ -1942,76 +1945,76 @@ defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
// NOTE: not all possible combinations are implemented
// 'space' is limited to generic as it's the only one needed to support CUDA.
// 'scope' = 'gpu' is default and is handled by regular atomic instructions.
-class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
+class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
dag ins, dag Operands>
: NVPTXInst<(outs regclass:$result), ins,
AsmStr,
- [(set regclass:$result, Operands)]>,
+ [(set (regT regclass:$result), Operands)]>,
Requires<Preds>;
// Define instruction variants for all addressing modes.
multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
- NVPTXRegClass regclass, Operand ImmType,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType,
SDNode Imm, ValueType ImmTy,
list<Predicate> Preds> {
let AddedComplexity = 1 in {
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b),
- (Intr Int32Regs:$src, regclass:$b)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b),
- (Intr Int64Regs:$src, regclass:$b)>;
+ (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
}
// tablegen can't infer argument types from Intrinsic (though it can
// from Instruction) so we have to enforce specific type on
// immediates via explicit cast to ImmTy.
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b),
- (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b),
- (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
+ (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
}
multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
- NVPTXRegClass regclass, Operand ImmType,
- SDNode Imm, ValueType ImmTy,
+ ValueType regT, NVPTXRegClass regclass,
+ Operand ImmType, SDNode Imm, ValueType ImmTy,
list<Predicate> Preds> {
// Variants for register/immediate permutations of $b and $c
let AddedComplexity = 2 in {
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b, regclass:$c),
- (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b, regclass:$c),
- (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
+ (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
}
let AddedComplexity = 1 in {
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b, regclass:$c),
- (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b, regclass:$c),
- (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, regclass:$b, ImmType:$c),
- (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, regclass:$b, ImmType:$c),
- (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
+ (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
}
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int32Regs:$src, ImmType:$b, ImmType:$c),
- (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
- def : ATOM23_impl<AsmStr, regclass, Preds,
+ (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+ def : ATOM23_impl<AsmStr, regT, regclass, Preds,
(ins Int64Regs:$src, ImmType:$b, ImmType:$c),
- (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+ (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
}
// Constructs intrinsic name and instruction asm strings.
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
ValueType ImmTy, list<Predicate> Preds> {
defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
@@ -2021,11 +2024,11 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
string ScopeStr, string SpaceStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
ValueType ImmTy, list<Predicate> Preds> {
defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
@@ -2035,93 +2038,93 @@ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
// Constructs variants for
diff erent address spaces.
// For now we only need variants for generic space pointers.
multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
- string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
+ string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
- string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
+ string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
- regclass, ImmType, Imm, ImmTy, Preds>;
+ regT, regclass, ImmType, Imm, ImmTy, Preds>;
}
// Constructs variants for
diff erent scopes of atomic op.
multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
ValueType ImmTy, list<Predicate> Preds> {
// .gpu scope is default and is currently covered by existing
// atomics w/o explicitly specified scope.
defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
}
multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
- NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
+ ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
list<Predicate> Preds> {
// No need to define ".gpu"-scoped atomics. They do the same thing
// as the regular, non-scoped atomics defined elsewhere.
defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
- regclass, ImmType, Imm, ImmTy,
+ regT, regclass, ImmType, Imm, ImmTy,
!listconcat(Preds,[hasAtomScope])>;
}
// atom.add
multiclass ATOM2_add_impl<string OpStr> {
- defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
- defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
- defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
+ defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
+ defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
[]>;
- defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
+ defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
[hasAtomAddF64]>;
}
// atom.{and,or,xor}
multiclass ATOM2_bitwise_impl<string OpStr> {
- defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
- defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
+ defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
[hasAtomBitwise64]>;
}
// atom.exch
multiclass ATOM2_exch_impl<string OpStr> {
- defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
- defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
+ defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
}
// atom.{min,max}
multiclass ATOM2_minmax_impl<string OpStr> {
- defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
- defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
+ defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
[hasAtomMinMax64]>;
- defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
+ defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
[hasAtomMinMax64]>;
}
// atom.{inc,dec}
multiclass ATOM2_incdec_impl<string OpStr> {
- defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
+ defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
}
// atom.cas
multiclass ATOM3_cas_impl<string OpStr> {
- defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
- defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
+ defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
+ defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
}
defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
More information about the llvm-commits
mailing list