[llvm] 7ce76e1 - [NVPTX] Rename register classes after float register removal (NFC) (#145255)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 23 10:53:40 PDT 2025
Author: Alex MacLean
Date: 2025-06-23T10:53:36-07:00
New Revision: 7ce76e1ad12408aa77dce45196f241a0eb601963
URL: https://github.com/llvm/llvm-project/commit/7ce76e1ad12408aa77dce45196f241a0eb601963
DIFF: https://github.com/llvm/llvm-project/commit/7ce76e1ad12408aa77dce45196f241a0eb601963.diff
LOG: [NVPTX] Rename register classes after float register removal (NFC) (#145255)
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
llvm/test/CodeGen/NVPTX/branch-fold.mir
llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
llvm/test/DebugInfo/NVPTX/debug-bool-var.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9af6fb2cb198e..38912a7f09e30 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -215,15 +215,15 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
// Encode the register class in the upper 4 bits
// Must be kept in sync with NVPTXInstPrinter::printRegName
unsigned Ret = 0;
- if (RC == &NVPTX::Int1RegsRegClass) {
+ if (RC == &NVPTX::B1RegClass) {
Ret = (1 << 28);
- } else if (RC == &NVPTX::Int16RegsRegClass) {
+ } else if (RC == &NVPTX::B16RegClass) {
Ret = (2 << 28);
- } else if (RC == &NVPTX::Int32RegsRegClass) {
+ } else if (RC == &NVPTX::B32RegClass) {
Ret = (3 << 28);
- } else if (RC == &NVPTX::Int64RegsRegClass) {
+ } else if (RC == &NVPTX::B64RegClass) {
Ret = (4 << 28);
- } else if (RC == &NVPTX::Int128RegsRegClass) {
+ } else if (RC == &NVPTX::B128RegClass) {
Ret = (7 << 28);
} else {
report_fatal_error("Bad register class");
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 492f4ab76fdbb..676654d6d33e7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -589,18 +589,18 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(Op, VT, IsOpSupported ? Action : NoI16x2Action);
};
- addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
- addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
- addRegisterClass(MVT::v2i16, &NVPTX::Int32RegsRegClass);
- addRegisterClass(MVT::v4i8, &NVPTX::Int32RegsRegClass);
- addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
- addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
- addRegisterClass(MVT::f32, &NVPTX::Int32RegsRegClass);
- addRegisterClass(MVT::f64, &NVPTX::Int64RegsRegClass);
- addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
- addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
- addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
- addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::i1, &NVPTX::B1RegClass);
+ addRegisterClass(MVT::i16, &NVPTX::B16RegClass);
+ addRegisterClass(MVT::v2i16, &NVPTX::B32RegClass);
+ addRegisterClass(MVT::v4i8, &NVPTX::B32RegClass);
+ addRegisterClass(MVT::i32, &NVPTX::B32RegClass);
+ addRegisterClass(MVT::i64, &NVPTX::B64RegClass);
+ addRegisterClass(MVT::f32, &NVPTX::B32RegClass);
+ addRegisterClass(MVT::f64, &NVPTX::B64RegClass);
+ addRegisterClass(MVT::f16, &NVPTX::B16RegClass);
+ addRegisterClass(MVT::v2f16, &NVPTX::B32RegClass);
+ addRegisterClass(MVT::bf16, &NVPTX::B16RegClass);
+ addRegisterClass(MVT::v2bf16, &NVPTX::B32RegClass);
// Conversion to/from FP16/FP16x2 is always legal.
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
@@ -4866,22 +4866,22 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'b':
- return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
+ return std::make_pair(0U, &NVPTX::B1RegClass);
case 'c':
case 'h':
- return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
+ return std::make_pair(0U, &NVPTX::B16RegClass);
case 'r':
case 'f':
- return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
+ return std::make_pair(0U, &NVPTX::B32RegClass);
case 'l':
case 'N':
case 'd':
- return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
+ return std::make_pair(0U, &NVPTX::B64RegClass);
case 'q': {
if (STI.getSmVersion() < 70)
report_fatal_error("Inline asm with 128 bit operands is only "
"supported for sm_70 and higher!");
- return std::make_pair(0U, &NVPTX::Int128RegsRegClass);
+ return std::make_pair(0U, &NVPTX::B128RegClass);
}
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index f262a0fb66c25..bf84d1dca4ed5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -39,15 +39,15 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
report_fatal_error("Copy one register into another with a
diff erent width");
unsigned Op;
- if (DestRC == &NVPTX::Int1RegsRegClass) {
+ if (DestRC == &NVPTX::B1RegClass) {
Op = NVPTX::IMOV1r;
- } else if (DestRC == &NVPTX::Int16RegsRegClass) {
+ } else if (DestRC == &NVPTX::B16RegClass) {
Op = NVPTX::MOV16r;
- } else if (DestRC == &NVPTX::Int32RegsRegClass) {
+ } else if (DestRC == &NVPTX::B32RegClass) {
Op = NVPTX::IMOV32r;
- } else if (DestRC == &NVPTX::Int64RegsRegClass) {
+ } else if (DestRC == &NVPTX::B64RegClass) {
Op = NVPTX::IMOV64r;
- } else if (DestRC == &NVPTX::Int128RegsRegClass) {
+ } else if (DestRC == &NVPTX::B128RegClass) {
Op = NVPTX::IMOV128r;
} else {
llvm_unreachable("Bad register copy");
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index bbe99dec5c445..5979054764647 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -170,29 +170,6 @@ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">;
-// Helper class to aid conversion between ValueType and a matching RegisterClass.
-
-class ValueToRegClass<ValueType T> {
- string name = !cast<string>(T);
- NVPTXRegClass ret = !cond(
- !eq(name, "i1"): Int1Regs,
- !eq(name, "i16"): Int16Regs,
- !eq(name, "v2i16"): Int32Regs,
- !eq(name, "i32"): Int32Regs,
- !eq(name, "i64"): Int64Regs,
- !eq(name, "f16"): Int16Regs,
- !eq(name, "v2f16"): Int32Regs,
- !eq(name, "bf16"): Int16Regs,
- !eq(name, "v2bf16"): Int32Regs,
- !eq(name, "f32"): Float32Regs,
- !eq(name, "f64"): Float64Regs,
- !eq(name, "ai32"): Int32ArgRegs,
- !eq(name, "ai64"): Int64ArgRegs,
- !eq(name, "af32"): Float32ArgRegs,
- !eq(name, "if64"): Float64ArgRegs,
- );
-}
-
//===----------------------------------------------------------------------===//
// Some Common Instruction Class Templates
@@ -219,18 +196,18 @@ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
int Size = ty.Size;
}
-def I1RT : RegTyInfo<i1, Int1Regs, i1imm, imm>;
-def I16RT : RegTyInfo<i16, Int16Regs, i16imm, imm>;
-def I32RT : RegTyInfo<i32, Int32Regs, i32imm, imm>;
-def I64RT : RegTyInfo<i64, Int64Regs, i64imm, imm>;
+def I1RT : RegTyInfo<i1, B1, i1imm, imm>;
+def I16RT : RegTyInfo<i16, B16, i16imm, imm>;
+def I32RT : RegTyInfo<i32, B32, i32imm, imm>;
+def I64RT : RegTyInfo<i64, B64, i64imm, imm>;
-def F32RT : RegTyInfo<f32, Float32Regs, f32imm, fpimm>;
-def F64RT : RegTyInfo<f64, Float64Regs, f64imm, fpimm>;
-def F16RT : RegTyInfo<f16, Int16Regs, f16imm, fpimm, supports_imm = 0>;
-def BF16RT : RegTyInfo<bf16, Int16Regs, bf16imm, fpimm, supports_imm = 0>;
+def F32RT : RegTyInfo<f32, B32, f32imm, fpimm>;
+def F64RT : RegTyInfo<f64, B64, f64imm, fpimm>;
+def F16RT : RegTyInfo<f16, B16, f16imm, fpimm, supports_imm = 0>;
+def BF16RT : RegTyInfo<bf16, B16, bf16imm, fpimm, supports_imm = 0>;
-def F16X2RT : RegTyInfo<v2f16, Int32Regs, ?, ?, supports_imm = 0>;
-def BF16X2RT : RegTyInfo<v2bf16, Int32Regs, ?, ?, supports_imm = 0>;
+def F16X2RT : RegTyInfo<v2f16, B32, ?, ?, supports_imm = 0>;
+def BF16X2RT : RegTyInfo<v2bf16, B32, ?, ?, supports_imm = 0>;
// This class provides a basic wrapper around an NVPTXInst that abstracts the
@@ -238,18 +215,18 @@ def BF16X2RT : RegTyInfo<v2bf16, Int32Regs, ?, ?, supports_imm = 0>;
// construction of the asm string based on the provided dag arguments.
// For example, the following asm-strings would be computed:
//
-// * BasicFlagsNVPTXInst<(outs Int32Regs:$dst),
-// (ins Int32Regs:$a, Int32Regs:$b), (ins),
+// * BasicFlagsNVPTXInst<(outs B32:$dst),
+// (ins B32:$a, B32:$b), (ins),
// "add.s32">;
// ---> "add.s32 \t$dst, $a, $b;"
//
-// * BasicFlagsNVPTXInst<(outs Int32Regs:$d),
-// (ins Int32Regs:$a, Int32Regs:$b, Hexu32imm:$c),
+// * BasicFlagsNVPTXInst<(outs B32:$d),
+// (ins B32:$a, B32:$b, Hexu32imm:$c),
// (ins PrmtMode:$mode),
// "prmt.b32${mode}">;
// ---> "prmt.b32${mode} \t$d, $a, $b, $c;"
//
-// * BasicFlagsNVPTXInst<(outs Int64Regs:$state),
+// * BasicFlagsNVPTXInst<(outs B64:$state),
// (ins ADDR:$addr),
// "mbarrier.arrive.b64">;
// ---> "mbarrier.arrive.b64 \t$state, [$addr];"
@@ -312,7 +289,7 @@ multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
}
class I16x2<string OpcStr, SDNode OpNode> :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
OpcStr # "16x2",
[(set v2i16:$dst, (OpNode v2i16:$a, v2i16:$b))]>,
Requires<[hasPTX<80>, hasSM<90>]>;
@@ -334,73 +311,73 @@ multiclass ADD_SUB_INT_CARRY<string op_str, SDNode op_node, bit commutative> {
multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
if !not(NaN) then {
def f64rr :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$a, B64:$b),
OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a, f64:$b))]>;
def f64ri :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$a, f64imm:$b),
OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>;
}
def f32rr_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
OpcStr # ".ftz.f32",
[(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
Requires<[doF32FTZ]>;
def f32ri_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
OpcStr # ".ftz.f32",
[(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
Requires<[doF32FTZ]>;
def f32rr :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
OpcStr # ".f32",
[(set f32:$dst, (OpNode f32:$a, f32:$b))]>;
def f32ri :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
OpcStr # ".f32",
[(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>;
def f16rr_ftz :
- BasicNVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst),
+ (ins B16:$a, B16:$b),
OpcStr # ".ftz.f16",
[(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16rr :
- BasicNVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst),
+ (ins B16:$a, B16:$b),
OpcStr # ".f16",
[(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
def f16x2rr_ftz :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
OpcStr # ".ftz.f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>;
def f16x2rr :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
OpcStr # ".f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
def bf16rr :
- BasicNVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst),
+ (ins B16:$a, B16:$b),
OpcStr # ".bf16",
[(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
def bf16x2rr :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
OpcStr # ".bf16x2",
[(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
@@ -417,73 +394,73 @@ multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
// just like the non ".rn" op, but prevents ptxas from creating FMAs.
multiclass F3<string op_str, SDPatternOperator op_pat> {
def f64rr :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$a, B64:$b),
op_str # ".f64",
[(set f64:$dst, (op_pat f64:$a, f64:$b))]>;
def f64ri :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$a, f64imm:$b),
op_str # ".f64",
[(set f64:$dst, (op_pat f64:$a, fpimm:$b))]>;
def f32rr_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
op_str # ".ftz.f32",
[(set f32:$dst, (op_pat f32:$a, f32:$b))]>,
Requires<[doF32FTZ]>;
def f32ri_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
op_str # ".ftz.f32",
[(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>,
Requires<[doF32FTZ]>;
def f32rr :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
op_str # ".f32",
[(set f32:$dst, (op_pat f32:$a, f32:$b))]>;
def f32ri :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
op_str # ".f32",
[(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
def f16rr_ftz :
- BasicNVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst),
+ (ins B16:$a, B16:$b),
op_str # ".ftz.f16",
[(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16rr :
- BasicNVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst),
+ (ins B16:$a, B16:$b),
op_str # ".f16",
[(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
Requires<[useFP16Math]>;
def f16x2rr_ftz :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
op_str # ".ftz.f16x2",
[(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16x2rr :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
op_str # ".f16x2",
[(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math]>;
def bf16rr :
- BasicNVPTXInst<(outs Int16Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst),
+ (ins B16:$a, B16:$b),
op_str # ".bf16",
[(set bf16:$dst, (op_pat bf16:$a, bf16:$b))]>,
Requires<[hasBF16Math]>;
def bf16x2rr :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
op_str # ".bf16x2",
[(set v2bf16:$dst, (op_pat v2bf16:$a, v2bf16:$b))]>,
Requires<[hasBF16Math]>;
@@ -504,40 +481,40 @@ multiclass F3_fma_component<string op_str, SDNode op_node> {
// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush
// subnormal inputs and results to zero).
multiclass F2<string OpcStr, SDNode OpNode> {
- def f64 : BasicNVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
+ def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a),
OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a))]>;
- def f32_ftz : BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ def f32_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
OpcStr # ".ftz.f32",
[(set f32:$dst, (OpNode f32:$a))]>,
Requires<[doF32FTZ]>;
- def f32 : BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
+ def f32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
OpcStr # ".f32",
[(set f32:$dst, (OpNode f32:$a))]>;
}
multiclass F2_Support_Half<string OpcStr, SDNode OpNode> {
- def bf16 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ def bf16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
OpcStr # ".bf16",
[(set bf16:$dst, (OpNode bf16:$a))]>,
Requires<[hasSM<80>, hasPTX<70>]>;
- def bf16x2 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ def bf16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
OpcStr # ".bf16x2",
[(set v2bf16:$dst, (OpNode v2bf16:$a))]>,
Requires<[hasSM<80>, hasPTX<70>]>;
- def f16_ftz : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ def f16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
OpcStr # ".ftz.f16",
[(set f16:$dst, (OpNode f16:$a))]>,
Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
- def f16x2_ftz : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ def f16x2_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
OpcStr # ".ftz.f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a))]>,
Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
- def f16 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ def f16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
OpcStr # ".f16",
[(set f16:$dst, (OpNode f16:$a))]>,
Requires<[hasSM<53>, hasPTX<65>]>;
- def f16x2 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ def f16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
OpcStr # ".f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a))]>,
Requires<[hasSM<53>, hasPTX<65>]>;
@@ -546,11 +523,11 @@ multiclass F2_Support_Half<string OpcStr, SDNode OpNode> {
// Variant where only .ftz.bf16 is supported.
multiclass F2_Support_Half_BF<string OpcStr, SDNode OpNode> {
- def bf16_ftz : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a),
+ def bf16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
OpcStr # ".ftz.bf16",
[(set bf16:$dst, (OpNode bf16:$a))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
- def bf16x2_ftz: BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ def bf16x2_ftz: BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
OpcStr # ".ftz.bf16x2",
[(set v2bf16:$dst, (OpNode v2bf16:$a))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
@@ -571,52 +548,52 @@ let hasSideEffects = false in {
multiclass CVT_FROM_ALL<string ToType, RegisterClass RC, list<Predicate> Preds = []> {
def _s8 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s8">,
Requires<Preds>;
def _u8 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u8">,
Requires<Preds>;
def _s16 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s16">,
Requires<Preds>;
def _u16 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u16">,
Requires<Preds>;
def _s32 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int32Regs:$src), (ins CvtMode:$mode),
+ (ins B32:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s32">,
Requires<Preds>;
def _u32 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int32Regs:$src), (ins CvtMode:$mode),
+ (ins B32:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u32">,
Requires<Preds>;
def _s64 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int64Regs:$src), (ins CvtMode:$mode),
+ (ins B64:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".s64">,
Requires<Preds>;
def _u64 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int64Regs:$src), (ins CvtMode:$mode),
+ (ins B64:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".u64">,
Requires<Preds>;
def _f16 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".f16">,
Requires<Preds>;
def _bf16 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}." # ToType # ".bf16">,
Requires<!if(!eq(ToType, "f32"),
// bf16->f32 was introduced early.
@@ -625,7 +602,7 @@ let hasSideEffects = false in {
[hasPTX<78>, hasSM<90>])>;
def _f32 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Float32Regs:$src), (ins CvtMode:$mode),
+ (ins B32:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}." # ToType # ".f32">,
Requires<!if(!eq(ToType, "bf16"),
// f32->bf16 was introduced early.
@@ -633,61 +610,61 @@ let hasSideEffects = false in {
Preds)>;
def _f64 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Float64Regs:$src), (ins CvtMode:$mode),
+ (ins B64:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:ftz}${mode:sat}." # ToType # ".f64">,
Requires<Preds>;
}
// Generate cvts from all types to all types.
- defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>;
- defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>;
- defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
- defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
- defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
- defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
- defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
- defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
- defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
- defm CVT_bf16 : CVT_FROM_ALL<"bf16", Int16Regs, [hasPTX<78>, hasSM<90>]>;
- defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
- defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
+ defm CVT_s8 : CVT_FROM_ALL<"s8", B16>;
+ defm CVT_u8 : CVT_FROM_ALL<"u8", B16>;
+ defm CVT_s16 : CVT_FROM_ALL<"s16", B16>;
+ defm CVT_u16 : CVT_FROM_ALL<"u16", B16>;
+ defm CVT_s32 : CVT_FROM_ALL<"s32", B32>;
+ defm CVT_u32 : CVT_FROM_ALL<"u32", B32>;
+ defm CVT_s64 : CVT_FROM_ALL<"s64", B64>;
+ defm CVT_u64 : CVT_FROM_ALL<"u64", B64>;
+ defm CVT_f16 : CVT_FROM_ALL<"f16", B16>;
+ defm CVT_bf16 : CVT_FROM_ALL<"bf16", B16, [hasPTX<78>, hasSM<90>]>;
+ defm CVT_f32 : CVT_FROM_ALL<"f32", B32>;
+ defm CVT_f64 : CVT_FROM_ALL<"f64", B64>;
// These cvts are
diff erent from those above: The source and dest registers
// are of the same type.
- def CVT_INREG_s16_s8 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+ def CVT_INREG_s16_s8 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$src),
"cvt.s16.s8">;
- def CVT_INREG_s32_s8 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ def CVT_INREG_s32_s8 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
"cvt.s32.s8">;
- def CVT_INREG_s32_s16 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+ def CVT_INREG_s32_s16 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
"cvt.s32.s16">;
- def CVT_INREG_s64_s8 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ def CVT_INREG_s64_s8 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src),
"cvt.s64.s8">;
- def CVT_INREG_s64_s16 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ def CVT_INREG_s64_s16 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src),
"cvt.s64.s16">;
- def CVT_INREG_s64_s32 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+ def CVT_INREG_s64_s32 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src),
"cvt.s64.s32">;
multiclass CVT_FROM_FLOAT_V2_SM80<string FromName, RegisterClass RC> {
def _f32 :
BasicFlagsNVPTXInst<(outs RC:$dst),
- (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode),
+ (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
"cvt${mode:base}${mode:relu}." # FromName # ".f32">,
Requires<[hasPTX<70>, hasSM<80>]>;
}
- defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>;
- defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>;
+ defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", B32>;
+ defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", B32>;
// FP8 conversions.
multiclass CVT_TO_F8X2<string F8Name> {
def _f32 :
- BasicFlagsNVPTXInst<(outs Int16Regs:$dst),
- (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode),
+ BasicFlagsNVPTXInst<(outs B16:$dst),
+ (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
"cvt${mode:base}.satfinite${mode:relu}." # F8Name # "x2.f32">,
Requires<[hasPTX<81>, hasSM<89>]>;
def _f16x2 :
- BasicFlagsNVPTXInst<(outs Int16Regs:$dst),
- (ins Int32Regs:$src), (ins CvtMode:$mode),
+ BasicFlagsNVPTXInst<(outs B16:$dst),
+ (ins B32:$src), (ins CvtMode:$mode),
"cvt${mode:base}.satfinite${mode:relu}." # F8Name # "x2.f16x2">,
Requires<[hasPTX<81>, hasSM<89>]>;
}
@@ -696,8 +673,8 @@ let hasSideEffects = false in {
defm CVT_e5m2x2 : CVT_TO_F8X2<"e5m2">;
class CVT_f16x2_fp8<string F8Name> :
- BasicFlagsNVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:relu}.f16x2." # F8Name # "x2">,
Requires<[hasPTX<81>, hasSM<89>]>;
@@ -708,7 +685,7 @@ let hasSideEffects = false in {
multiclass CVT_TO_TF32<string Modifier, list<Predicate> Preds = [hasPTX<78>, hasSM<90>]> {
defvar Intr = !cast<Intrinsic>("int_nvvm_f2tf32_" # !subst(".", "_", Modifier));
- def NAME : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$src),
+ def NAME : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
"cvt." # Modifier # ".tf32.f32",
[(set i32:$dst, (Intr f32:$src))]>,
Requires<Preds>;
@@ -728,25 +705,25 @@ let hasSideEffects = false in {
// FP6 conversions.
foreach type = ["e2m3x2", "e3m2x2"] in {
- def CVT_ # type # _f32_sf : BasicFlagsNVPTXInst<(outs Int16Regs:$dst),
- (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode),
+ def CVT_ # type # _f32_sf : BasicFlagsNVPTXInst<(outs B16:$dst),
+ (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
"cvt${mode:base}.satfinite${mode:relu}." # type # ".f32">;
- def CVT_f16x2_ # type : BasicFlagsNVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$src), (ins CvtMode:$mode),
+ def CVT_f16x2_ # type : BasicFlagsNVPTXInst<(outs B32:$dst),
+ (ins B16:$src), (ins CvtMode:$mode),
"cvt${mode:base}${mode:relu}.f16x2." # type>;
}
// FP4 conversions.
- def CVT_e2m1x2_f32_sf : NVPTXInst<(outs Int16Regs:$dst),
- (ins Float32Regs:$src1, Float32Regs:$src2, CvtMode:$mode),
+ def CVT_e2m1x2_f32_sf : NVPTXInst<(outs B16:$dst),
+ (ins B32:$src1, B32:$src2, CvtMode:$mode),
!strconcat("{{ \n\t",
".reg .b8 \t%e2m1x2_out; \n\t",
"cvt${mode:base}.satfinite${mode:relu}.e2m1x2.f32 \t%e2m1x2_out, $src1, $src2; \n\t",
"cvt.u16.u8 \t$dst, %e2m1x2_out; \n\t",
"}}"), []>;
- def CVT_f16x2_e2m1x2 : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$src, CvtMode:$mode),
+ def CVT_f16x2_e2m1x2 : NVPTXInst<(outs B32:$dst),
+ (ins B16:$src, CvtMode:$mode),
!strconcat("{{ \n\t",
".reg .b8 \t%e2m1x2_in; \n\t",
"cvt.u8.u16 \t%e2m1x2_in, $src; \n\t",
@@ -755,13 +732,13 @@ let hasSideEffects = false in {
// UE8M0x2 conversions.
class CVT_f32_to_ue8m0x2<string sat = ""> :
- BasicFlagsNVPTXInst<(outs Int16Regs:$dst),
- (ins Float32Regs:$src1, Float32Regs:$src2), (ins CvtMode:$mode),
+ BasicFlagsNVPTXInst<(outs B16:$dst),
+ (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
"cvt${mode:base}" # sat # ".ue8m0x2.f32">;
class CVT_bf16x2_to_ue8m0x2<string sat = ""> :
- BasicFlagsNVPTXInst<(outs Int16Regs:$dst),
- (ins Int32Regs:$src), (ins CvtMode:$mode),
+ BasicFlagsNVPTXInst<(outs B16:$dst),
+ (ins B32:$src), (ins CvtMode:$mode),
"cvt${mode:base}" # sat # ".ue8m0x2.bf16x2">;
def CVT_ue8m0x2_f32 : CVT_f32_to_ue8m0x2;
@@ -770,8 +747,8 @@ let hasSideEffects = false in {
def CVT_ue8m0x2_bf16x2_sf : CVT_bf16x2_to_ue8m0x2<".satfinite">;
def CVT_bf16x2_ue8m0x2 :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$src),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B16:$src),
"cvt.rn.bf16x2.ue8m0x2">;
}
@@ -800,22 +777,22 @@ let hasSideEffects = false in {
defvar asm_str = "selp." # TypeStr;
def rr :
BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.RC:$a, t.RC:$b, Int1Regs:$p),
+ (ins t.RC:$a, t.RC:$b, B1:$p),
asm_str,
[(set t.Ty:$dst, (select i1:$p, t.Ty:$a, t.Ty:$b))]>;
def ri :
BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.RC:$a, t.Imm:$b, Int1Regs:$p),
+ (ins t.RC:$a, t.Imm:$b, B1:$p),
asm_str,
[(set t.Ty:$dst, (select i1:$p, t.Ty:$a, t.ImmNode:$b))]>;
def ir :
BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.Imm:$a, t.RC:$b, Int1Regs:$p),
+ (ins t.Imm:$a, t.RC:$b, B1:$p),
asm_str,
[(set t.Ty:$dst, (select i1:$p, t.ImmNode:$a, t.Ty:$b))]>;
def ii :
BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.Imm:$a, t.Imm:$b, Int1Regs:$p),
+ (ins t.Imm:$a, t.Imm:$b, B1:$p),
asm_str,
[(set t.Ty:$dst, (select i1:$p, t.ImmNode:$a, t.ImmNode:$b))]>;
}
@@ -833,7 +810,7 @@ defm SELP_f64 : SELP_PATTERN<"f64", F64RT>;
// This does not work as tablegen fails to infer the type of 'imm'.
// def v2f16imm : Operand<v2f16>;
-// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>;
+// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, B32, v2f16imm, imm>;
foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
def : Pat<(vt (select i1:$p, vt:$a, vt:$b)),
@@ -846,10 +823,10 @@ def : Pat<(vt (select i1:$p, vt:$a, vt:$b)),
def fabs_oneuse : OneUse1<fabs>;
-def TESTINF_f32r : BasicNVPTXInst<(outs Int1Regs:$p), (ins Float32Regs:$a),
+def TESTINF_f32r : BasicNVPTXInst<(outs B1:$p), (ins B32:$a),
"testp.infinite.f32",
[(set i1:$p, (seteq (fabs_oneuse f32:$a), fpimm_pos_inf<f32>))]>;
-def TESTINF_f64r : BasicNVPTXInst<(outs Int1Regs:$p), (ins Float64Regs:$a),
+def TESTINF_f64r : BasicNVPTXInst<(outs B1:$p), (ins B64:$a),
"testp.infinite.f64",
[(set i1:$p, (seteq (fabs_oneuse f64:$a), fpimm_pos_inf<f64>))]>;
@@ -893,9 +870,9 @@ multiclass ABS<ValueType T, RegisterClass RC, string SizeName> {
"abs" # SizeName,
[(set T:$dst, (abs T:$a))]>;
}
-defm ABS_16 : ABS<i16, Int16Regs, ".s16">;
-defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
-defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
+defm ABS_16 : ABS<i16, B16, ".s16">;
+defm ABS_32 : ABS<i32, B32, ".s32">;
+defm ABS_64 : ABS<i64, B64, ".s64">;
// Integer min/max.
defm SMAX : I3<"max.s", smax, commutative = true>;
@@ -913,44 +890,32 @@ def UMIN16x2 : I16x2<"min.u", umin>;
// Wide multiplication
//
def MULWIDES64 :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- "mul.wide.s32">;
+ BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, B32:$b), "mul.wide.s32">;
def MULWIDES64Imm :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- "mul.wide.s32">;
+ BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i32imm:$b), "mul.wide.s32">;
def MULWIDES64Imm64 :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
- "mul.wide.s32">;
+ BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i64imm:$b), "mul.wide.s32">;
def MULWIDEU64 :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- "mul.wide.u32">;
+ BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, B32:$b), "mul.wide.u32">;
def MULWIDEU64Imm :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
- "mul.wide.u32">;
+ BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i32imm:$b), "mul.wide.u32">;
def MULWIDEU64Imm64 :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
- "mul.wide.u32">;
+ BasicNVPTXInst<(outs B64:$dst), (ins B32:$a, i64imm:$b), "mul.wide.u32">;
def MULWIDES32 :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- "mul.wide.s16">;
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, B16:$b), "mul.wide.s16">;
def MULWIDES32Imm :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- "mul.wide.s16">;
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i16imm:$b), "mul.wide.s16">;
def MULWIDES32Imm32 :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
- "mul.wide.s16">;
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i32imm:$b), "mul.wide.s16">;
def MULWIDEU32 :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
- "mul.wide.u16">;
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, B16:$b), "mul.wide.u16">;
def MULWIDEU32Imm :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
- "mul.wide.u16">;
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i16imm:$b), "mul.wide.u16">;
def MULWIDEU32Imm32 :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
- "mul.wide.u16">;
+ BasicNVPTXInst<(outs B32:$dst), (ins B16:$a, i32imm:$b), "mul.wide.u16">;
def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
@@ -1088,9 +1053,9 @@ multiclass MAD<string Ptx, ValueType VT, NVPTXRegClass Reg, Operand Imm> {
}
let Predicates = [hasOptEnabled] in {
-defm MAD16 : MAD<"mad.lo.s16", i16, Int16Regs, i16imm>;
-defm MAD32 : MAD<"mad.lo.s32", i32, Int32Regs, i32imm>;
-defm MAD64 : MAD<"mad.lo.s64", i64, Int64Regs, i64imm>;
+defm MAD16 : MAD<"mad.lo.s16", i16, B16, i16imm>;
+defm MAD32 : MAD<"mad.lo.s32", i32, B32, i32imm>;
+defm MAD64 : MAD<"mad.lo.s64", i64, B64, i64imm>;
}
foreach t = [I16RT, I32RT, I64RT] in {
@@ -1146,10 +1111,10 @@ class FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pre
OpcStr,
[(set T:$dst, (fneg T:$src))]>,
Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>;
-def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>;
-def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>;
-def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
-def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>;
+def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, B16, doF32FTZ>;
+def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, B16, True>;
+def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, B32, doF32FTZ>;
+def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, B32, True>;
//
// BF16 NEG
@@ -1160,27 +1125,27 @@ class FNEG_BF16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pr
OpcStr,
[(set T:$dst, (fneg T:$src))]>,
Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>;
-def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, Int16Regs, doF32FTZ>;
-def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, Int16Regs, True>;
-def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>;
-def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
+def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, B16, doF32FTZ>;
+def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, B16, True>;
+def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, B32, doF32FTZ>;
+def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, B32, True>;
//
// F64 division
//
def FRCP64r :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$b),
"rcp.rn.f64",
[(set f64:$dst, (fdiv f64imm_1, f64:$b))]>;
def FDIV64rr :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, Float64Regs:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$a, B64:$b),
"div.rn.f64",
[(set f64:$dst, (fdiv f64:$a, f64:$b))]>;
def FDIV64ri :
- BasicNVPTXInst<(outs Float64Regs:$dst),
- (ins Float64Regs:$a, f64imm:$b),
+ BasicNVPTXInst<(outs B64:$dst),
+ (ins B64:$a, f64imm:$b),
"div.rn.f64",
[(set f64:$dst, (fdiv f64:$a, fpimm:$b))]>;
@@ -1200,14 +1165,14 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
def FRCP32_approx_r_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$b),
"rcp.approx.ftz.f32",
[(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
Requires<[doF32FTZ]>;
def FRCP32_approx_r :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$b),
"rcp.approx.f32",
[(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
@@ -1215,25 +1180,25 @@ def FRCP32_approx_r :
// F32 Approximate division
//
def FDIV32approxrr_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
"div.approx.ftz.f32",
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
Requires<[doF32FTZ]>;
def FDIV32approxri_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
"div.approx.ftz.f32",
[(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
Requires<[doF32FTZ]>;
def FDIV32approxrr :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
"div.approx.f32",
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
def FDIV32approxri :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
"div.approx.f32",
[(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
//
@@ -1259,25 +1224,25 @@ def : Pat<(fdiv_full f32imm_1, f32:$b),
// F32 Semi-accurate division
//
def FDIV32rr_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
"div.full.ftz.f32",
[(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
Requires<[doF32FTZ]>;
def FDIV32ri_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
"div.full.ftz.f32",
[(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
Requires<[doF32FTZ]>;
def FDIV32rr :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
"div.full.f32",
[(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
def FDIV32ri :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
"div.full.f32",
[(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
//
@@ -1290,39 +1255,39 @@ def fdiv_ftz : PatFrag<(ops node:$a, node:$b),
}]>;
def FRCP32r_prec_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$b),
"rcp.rn.ftz.f32",
[(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>,
Requires<[doF32FTZ]>;
def FRCP32r_prec :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$b),
"rcp.rn.f32",
[(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
//
// F32 Accurate division
//
def FDIV32rr_prec_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
"div.rn.ftz.f32",
[(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>,
Requires<[doF32FTZ]>;
def FDIV32ri_prec_ftz :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
"div.rn.ftz.f32",
[(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>,
Requires<[doF32FTZ]>;
def FDIV32rr_prec :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b),
"div.rn.f32",
[(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
def FDIV32ri_prec :
- BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, f32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, f32imm:$b),
"div.rn.f32",
[(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
@@ -1378,10 +1343,10 @@ class UnaryOpAllowsApproxFn<SDPatternOperator operator>
return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
}]>;
-def SINF: BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+def SINF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
"sin.approx.f32",
[(set f32:$dst, (UnaryOpAllowsApproxFn<fsin> f32:$src))]>;
-def COSF: BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
+def COSF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
"cos.approx.f32",
[(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;
@@ -1433,16 +1398,16 @@ foreach vt = [v2i16, v4i8] in {
(ANDb32ri $a, imm:$b)>;
}
-def NOT1 : BasicNVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
+def NOT1 : BasicNVPTXInst<(outs B1:$dst), (ins B1:$src),
"not.pred",
[(set i1:$dst, (not i1:$src))]>;
-def NOT16 : BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
+def NOT16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$src),
"not.b16",
[(set i16:$dst, (not i16:$src))]>;
-def NOT32 : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
+def NOT32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
"not.b32",
[(set i32:$dst, (not i32:$src))]>;
-def NOT64 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
+def NOT64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$src),
"not.b64",
[(set i64:$dst, (not i64:$src))]>;
@@ -1453,31 +1418,31 @@ def NOT64 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
// This template also defines a 32-bit shift (imm, imm) instruction.
multiclass SHIFT<string OpcStr, SDNode OpNode> {
def i64rr :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, B32:$b),
OpcStr # "64",
[(set i64:$dst, (OpNode i64:$a, i32:$b))]>;
def i64ri :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
+ BasicNVPTXInst<(outs B64:$dst), (ins B64:$a, i32imm:$b),
OpcStr # "64",
[(set i64:$dst, (OpNode i64:$a, (i32 imm:$b)))]>;
def i32rr :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
OpcStr # "32",
[(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
def i32ri :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, i32imm:$b),
OpcStr # "32",
[(set i32:$dst, (OpNode i32:$a, (i32 imm:$b)))]>;
def i32ii :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
+ BasicNVPTXInst<(outs B32:$dst), (ins i32imm:$a, i32imm:$b),
OpcStr # "32",
[(set i32:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>;
def i16rr :
- BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b),
+ BasicNVPTXInst<(outs B16:$dst), (ins B16:$a, B32:$b),
OpcStr # "16",
[(set i16:$dst, (OpNode i16:$a, i32:$b))]>;
def i16ri :
- BasicNVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ BasicNVPTXInst<(outs B16:$dst), (ins B16:$a, i32imm:$b),
OpcStr # "16",
[(set i16:$dst, (OpNode i16:$a, (i32 imm:$b)))]>;
}
@@ -1488,11 +1453,11 @@ defm SRL : SHIFT<"shr.u", srl>;
// Bit-reverse
def BREV32 :
- BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
"brev.b32",
[(set i32:$dst, (bitreverse i32:$a))]>;
def BREV64 :
- BasicNVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a),
+ BasicNVPTXInst<(outs B64:$dst), (ins B64:$a),
"brev.b64",
[(set i64:$dst, (bitreverse i64:$a))]>;
@@ -1525,12 +1490,12 @@ def prmt : SDNode<"NVPTXISD::PRMT", SDTPRMT>;
multiclass BFE<string Instr, ValueType T, RegisterClass RC> {
def rrr
: BasicNVPTXInst<(outs RC:$d),
- (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
+ (ins RC:$a, B32:$b, B32:$c),
Instr,
[(set T:$d, (bfe T:$a, i32:$b, i32:$c))]>;
def rri
: BasicNVPTXInst<(outs RC:$d),
- (ins RC:$a, Int32Regs:$b, i32imm:$c),
+ (ins RC:$a, B32:$b, i32imm:$c),
Instr,
[(set T:$d, (bfe T:$a, i32:$b, imm:$c))]>;
def rii
@@ -1543,12 +1508,12 @@ multiclass BFE<string Instr, ValueType T, RegisterClass RC> {
multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
def rrrr
: BasicNVPTXInst<(outs RC:$f),
- (ins RC:$a, RC:$b, Int32Regs:$c, Int32Regs:$d),
+ (ins RC:$a, RC:$b, B32:$c, B32:$d),
Instr,
[(set T:$f, (bfi T:$a, T:$b, i32:$c, i32:$d))]>;
def rrri
: BasicNVPTXInst<(outs RC:$f),
- (ins RC:$a, RC:$b, Int32Regs:$c, i32imm:$d),
+ (ins RC:$a, RC:$b, B32:$c, i32imm:$d),
Instr,
[(set T:$f, (bfi T:$a, T:$b, i32:$c, imm:$d))]>;
def rrii
@@ -1558,12 +1523,12 @@ multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
[(set T:$f, (bfi T:$a, T:$b, imm:$c, imm:$d))]>;
def irrr
: BasicNVPTXInst<(outs RC:$f),
- (ins ImmCls:$a, RC:$b, Int32Regs:$c, Int32Regs:$d),
+ (ins ImmCls:$a, RC:$b, B32:$c, B32:$d),
Instr,
[(set T:$f, (bfi (T imm:$a), T:$b, i32:$c, i32:$d))]>;
def irri
: BasicNVPTXInst<(outs RC:$f),
- (ins ImmCls:$a, RC:$b, Int32Regs:$c, i32imm:$d),
+ (ins ImmCls:$a, RC:$b, B32:$c, i32imm:$d),
Instr,
[(set T:$f, (bfi (T imm:$a), T:$b, i32:$c, imm:$d))]>;
def irii
@@ -1582,35 +1547,35 @@ let hasSideEffects = false in {
// the same patterns, so the first one wins. Having unsigned byte extraction
// has the benefit of always having zero in unused bits, which makes some
// optimizations easier (e.g. no need to mask them).
- defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
- defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
- defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
- defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
+ defm BFE_U32 : BFE<"bfe.u32", i32, B32>;
+ defm BFE_S32 : BFE<"bfe.s32", i32, B32>;
+ defm BFE_U64 : BFE<"bfe.u64", i64, B64>;
+ defm BFE_S64 : BFE<"bfe.s64", i64, B64>;
- defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
- defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
+ defm BFI_B32 : BFI<"bfi.b32", i32, B32, i32imm>;
+ defm BFI_B64 : BFI<"bfi.b64", i64, B64, i64imm>;
def PRMT_B32rrr
- : BasicFlagsNVPTXInst<(outs Int32Regs:$d),
- (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ : BasicFlagsNVPTXInst<(outs B32:$d),
+ (ins B32:$a, B32:$b, B32:$c),
(ins PrmtMode:$mode),
"prmt.b32$mode",
[(set i32:$d, (prmt i32:$a, i32:$b, i32:$c, imm:$mode))]>;
def PRMT_B32rri
- : BasicFlagsNVPTXInst<(outs Int32Regs:$d),
- (ins Int32Regs:$a, Int32Regs:$b, Hexu32imm:$c),
+ : BasicFlagsNVPTXInst<(outs B32:$d),
+ (ins B32:$a, B32:$b, Hexu32imm:$c),
(ins PrmtMode:$mode),
"prmt.b32$mode",
[(set i32:$d, (prmt i32:$a, i32:$b, imm:$c, imm:$mode))]>;
def PRMT_B32rii
- : BasicFlagsNVPTXInst<(outs Int32Regs:$d),
- (ins Int32Regs:$a, i32imm:$b, Hexu32imm:$c),
+ : BasicFlagsNVPTXInst<(outs B32:$d),
+ (ins B32:$a, i32imm:$b, Hexu32imm:$c),
(ins PrmtMode:$mode),
"prmt.b32$mode",
[(set i32:$d, (prmt i32:$a, imm:$b, imm:$c, imm:$mode))]>;
def PRMT_B32rir
- : BasicFlagsNVPTXInst<(outs Int32Regs:$d),
- (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
+ : BasicFlagsNVPTXInst<(outs B32:$d),
+ (ins B32:$a, i32imm:$b, B32:$c),
(ins PrmtMode:$mode),
"prmt.b32$mode",
[(set i32:$d, (prmt i32:$a, imm:$b, i32:$c, imm:$mode))]>;
@@ -1663,48 +1628,48 @@ def : Pat<(i16 (sext_inreg (trunc (srl i64:$s, (i32 imm:$o))), i8)),
let hasSideEffects = false in {
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr :
- BasicFlagsNVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}." # TypeStr>;
def ri :
- BasicFlagsNVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}." # TypeStr>;
def ir :
- BasicFlagsNVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}." # TypeStr>;
}
}
-defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
-defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
-defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
-defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
-defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
-defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
-defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
-defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
-defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
-defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
-defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
+defm SETP_b16 : SETP<"b16", B16, i16imm>;
+defm SETP_s16 : SETP<"s16", B16, i16imm>;
+defm SETP_u16 : SETP<"u16", B16, i16imm>;
+defm SETP_b32 : SETP<"b32", B32, i32imm>;
+defm SETP_s32 : SETP<"s32", B32, i32imm>;
+defm SETP_u32 : SETP<"u32", B32, i32imm>;
+defm SETP_b64 : SETP<"b64", B64, i64imm>;
+defm SETP_s64 : SETP<"s64", B64, i64imm>;
+defm SETP_u64 : SETP<"u64", B64, i64imm>;
+defm SETP_f32 : SETP<"f32", B32, f32imm>;
+defm SETP_f64 : SETP<"f64", B64, f64imm>;
def SETP_f16rr :
- BasicFlagsNVPTXInst<(outs Int1Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$dst),
+ (ins B16:$a, B16:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.f16">,
Requires<[useFP16Math]>;
def SETP_f16x2rr :
- BasicFlagsNVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
- (ins Int32Regs:$a, Int32Regs:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$p, B1:$q),
+ (ins B32:$a, B32:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.f16x2">,
Requires<[useFP16Math]>;
def SETP_bf16rr :
- BasicFlagsNVPTXInst<(outs Int1Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$dst),
+ (ins B16:$a, B16:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.bf16">,
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
def SETP_bf16x2rr :
- BasicFlagsNVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
- (ins Int32Regs:$a, Int32Regs:$b), (ins CmpMode:$cmp),
+ BasicFlagsNVPTXInst<(outs B1:$p, B1:$q),
+ (ins B32:$a, B32:$b), (ins CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.bf16x2">,
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
@@ -1739,18 +1704,18 @@ def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
// Load a memory address into a u32 or u64 register.
-def MOV_ADDR : BasicNVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a),
+def MOV_ADDR : BasicNVPTXInst<(outs B32:$dst), (ins ADDR_base:$a),
"mov.b32",
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
-def MOV_ADDR64 : BasicNVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a),
+def MOV_ADDR64 : BasicNVPTXInst<(outs B64:$dst), (ins ADDR_base:$a),
"mov.b64",
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack.
let hasSideEffects = false in {
- def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
+ def MOV_DEPOT_ADDR : NVPTXInst<(outs B32:$d), (ins i32imm:$num),
"mov.b32 \t$d, __local_depot$num;", []>;
- def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
+ def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs B64:$d), (ins i32imm:$num),
"mov.b64 \t$d, __local_depot$num;", []>;
}
@@ -1769,29 +1734,29 @@ let hasSideEffects = false, isAsCheapAsAMove = true in {
[(set VT:$dst, ImmNode:$src)]>;
}
-def IMOV1r : MOVr<Int1Regs, "pred">;
-def MOV16r : MOVr<Int16Regs, "b16">;
-def IMOV32r : MOVr<Int32Regs, "b32">;
-def IMOV64r : MOVr<Int64Regs, "b64">;
-def IMOV128r : MOVr<Int128Regs, "b128">;
+def IMOV1r : MOVr<B1, "pred">;
+def MOV16r : MOVr<B16, "b16">;
+def IMOV32r : MOVr<B32, "b32">;
+def IMOV64r : MOVr<B64, "b64">;
+def IMOV128r : MOVr<B128, "b128">;
-def IMOV1i : MOVi<Int1Regs, "pred", i1, i1imm, imm>;
-def IMOV16i : MOVi<Int16Regs, "b16", i16, i16imm, imm>;
-def IMOV32i : MOVi<Int32Regs, "b32", i32, i32imm, imm>;
-def IMOV64i : MOVi<Int64Regs, "b64", i64, i64imm, imm>;
-def FMOV16i : MOVi<Int16Regs, "b16", f16, f16imm, fpimm>;
-def BFMOV16i : MOVi<Int16Regs, "b16", bf16, bf16imm, fpimm>;
-def FMOV32i : MOVi<Float32Regs, "b32", f32, f32imm, fpimm>;
-def FMOV64i : MOVi<Float64Regs, "b64", f64, f64imm, fpimm>;
+def IMOV1i : MOVi<B1, "pred", i1, i1imm, imm>;
+def IMOV16i : MOVi<B16, "b16", i16, i16imm, imm>;
+def IMOV32i : MOVi<B32, "b32", i32, i32imm, imm>;
+def IMOV64i : MOVi<B64, "b64", i64, i64imm, imm>;
+def FMOV16i : MOVi<B16, "b16", f16, f16imm, fpimm>;
+def BFMOV16i : MOVi<B16, "b16", bf16, bf16imm, fpimm>;
+def FMOV32i : MOVi<B32, "b32", f32, f32imm, fpimm>;
+def FMOV64i : MOVi<B64, "b64", f64, f64imm, fpimm>;
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;
//---- Copy Frame Index ----
-def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr),
+def LEA_ADDRi : NVPTXInst<(outs B32:$dst), (ins ADDR:$addr),
"add.u32 \t$dst, ${addr:add};", []>;
-def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR:$addr),
+def LEA_ADDRi64 : NVPTXInst<(outs B64:$dst), (ins ADDR:$addr),
"add.u64 \t$dst, ${addr:add};", []>;
def to_tframeindex : SDNodeXForm<frameindex, [{
@@ -1872,66 +1837,66 @@ defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
// comparisons of i8 extracted with BFE as i32
// It's faster to do comparison directly on i32 extracted by BFE,
// instead of the long conversion and sign extending.
-def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
+def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
+ (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
-def: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
+def: Pat<(setgt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
+ (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
-def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
+def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
+ (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
-def: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
+def: Pat<(setge (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
+ (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
-def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
+def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
+ (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
-def: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
+def: Pat<(setlt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
+ (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
-def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
+def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
+ (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
-def: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
+def: Pat<(setle (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
+ (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
-def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
+def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
-def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
+def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
-def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
+def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
-def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
+def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
-def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
+def: Pat<(setult (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
-def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
+def: Pat<(setult (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
-def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
+def: Pat<(setule (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
-def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
+def: Pat<(setule (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
-def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
+def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
-def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
+def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
-def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
+def: Pat<(setne (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
-def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
+def: Pat<(setne (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
// i1 compare -> i32
@@ -2228,56 +2193,56 @@ let isConvergent=1 in {
defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>;
}
-def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
-def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
-def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">;
-def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">;
-def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">;
-def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">;
-def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">;
-def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
-def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
-def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
-def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
-
-defm StoreParamI64 : StoreParamInst<Int64Regs, i64imm, ".b64">;
-defm StoreParamI32 : StoreParamInst<Int32Regs, i32imm, ".b32">;
-defm StoreParamI16 : StoreParamInst<Int16Regs, i16imm, ".b16">;
-defm StoreParamI8 : StoreParamInst<Int16Regs, i8imm, ".b8">;
-
-defm StoreParamI8TruncI32 : StoreParamInst<Int32Regs, i8imm, ".b8", /* support_imm */ false>;
-defm StoreParamI8TruncI64 : StoreParamInst<Int64Regs, i8imm, ".b8", /* support_imm */ false>;
-
-defm StoreParamV2I64 : StoreParamV2Inst<Int64Regs, i64imm, ".b64">;
-defm StoreParamV2I32 : StoreParamV2Inst<Int32Regs, i32imm, ".b32">;
-defm StoreParamV2I16 : StoreParamV2Inst<Int16Regs, i16imm, ".b16">;
-defm StoreParamV2I8 : StoreParamV2Inst<Int16Regs, i8imm, ".b8">;
-
-defm StoreParamV4I32 : StoreParamV4Inst<Int32Regs, i32imm, ".b32">;
-defm StoreParamV4I16 : StoreParamV4Inst<Int16Regs, i16imm, ".b16">;
-defm StoreParamV4I8 : StoreParamV4Inst<Int16Regs, i8imm, ".b8">;
-
-defm StoreParamF32 : StoreParamInst<Float32Regs, f32imm, ".b32">;
-defm StoreParamF64 : StoreParamInst<Float64Regs, f64imm, ".b64">;
-
-defm StoreParamV2F32 : StoreParamV2Inst<Float32Regs, f32imm, ".b32">;
-defm StoreParamV2F64 : StoreParamV2Inst<Float64Regs, f64imm, ".b64">;
-
-defm StoreParamV4F32 : StoreParamV4Inst<Float32Regs, f32imm, ".b32">;
-
-def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
-def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
-def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">;
-def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">;
-def StoreRetvalI8TruncI32 : StoreRetvalInst<Int32Regs, ".b8">;
-def StoreRetvalI8TruncI64 : StoreRetvalInst<Int64Regs, ".b8">;
-def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">;
-def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">;
-def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">;
-def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">;
-def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">;
-def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">;
-def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
+def LoadParamMemI64 : LoadParamMemInst<B64, ".b64">;
+def LoadParamMemI32 : LoadParamMemInst<B32, ".b32">;
+def LoadParamMemI16 : LoadParamMemInst<B16, ".b16">;
+def LoadParamMemI8 : LoadParamMemInst<B16, ".b8">;
+def LoadParamMemV2I64 : LoadParamV2MemInst<B64, ".b64">;
+def LoadParamMemV2I32 : LoadParamV2MemInst<B32, ".b32">;
+def LoadParamMemV2I16 : LoadParamV2MemInst<B16, ".b16">;
+def LoadParamMemV2I8 : LoadParamV2MemInst<B16, ".b8">;
+def LoadParamMemV4I32 : LoadParamV4MemInst<B32, ".b32">;
+def LoadParamMemV4I16 : LoadParamV4MemInst<B16, ".b16">;
+def LoadParamMemV4I8 : LoadParamV4MemInst<B16, ".b8">;
+
+defm StoreParamI64 : StoreParamInst<B64, i64imm, ".b64">;
+defm StoreParamI32 : StoreParamInst<B32, i32imm, ".b32">;
+defm StoreParamI16 : StoreParamInst<B16, i16imm, ".b16">;
+defm StoreParamI8 : StoreParamInst<B16, i8imm, ".b8">;
+
+defm StoreParamI8TruncI32 : StoreParamInst<B32, i8imm, ".b8", /* support_imm */ false>;
+defm StoreParamI8TruncI64 : StoreParamInst<B64, i8imm, ".b8", /* support_imm */ false>;
+
+defm StoreParamV2I64 : StoreParamV2Inst<B64, i64imm, ".b64">;
+defm StoreParamV2I32 : StoreParamV2Inst<B32, i32imm, ".b32">;
+defm StoreParamV2I16 : StoreParamV2Inst<B16, i16imm, ".b16">;
+defm StoreParamV2I8 : StoreParamV2Inst<B16, i8imm, ".b8">;
+
+defm StoreParamV4I32 : StoreParamV4Inst<B32, i32imm, ".b32">;
+defm StoreParamV4I16 : StoreParamV4Inst<B16, i16imm, ".b16">;
+defm StoreParamV4I8 : StoreParamV4Inst<B16, i8imm, ".b8">;
+
+defm StoreParamF32 : StoreParamInst<B32, f32imm, ".b32">;
+defm StoreParamF64 : StoreParamInst<B64, f64imm, ".b64">;
+
+defm StoreParamV2F32 : StoreParamV2Inst<B32, f32imm, ".b32">;
+defm StoreParamV2F64 : StoreParamV2Inst<B64, f64imm, ".b64">;
+
+defm StoreParamV4F32 : StoreParamV4Inst<B32, f32imm, ".b32">;
+
+def StoreRetvalI64 : StoreRetvalInst<B64, ".b64">;
+def StoreRetvalI32 : StoreRetvalInst<B32, ".b32">;
+def StoreRetvalI16 : StoreRetvalInst<B16, ".b16">;
+def StoreRetvalI8 : StoreRetvalInst<B16, ".b8">;
+def StoreRetvalI8TruncI32 : StoreRetvalInst<B32, ".b8">;
+def StoreRetvalI8TruncI64 : StoreRetvalInst<B64, ".b8">;
+def StoreRetvalV2I64 : StoreRetvalV2Inst<B64, ".b64">;
+def StoreRetvalV2I32 : StoreRetvalV2Inst<B32, ".b32">;
+def StoreRetvalV2I16 : StoreRetvalV2Inst<B16, ".b16">;
+def StoreRetvalV2I8 : StoreRetvalV2Inst<B16, ".b8">;
+def StoreRetvalV4I32 : StoreRetvalV4Inst<B32, ".b32">;
+def StoreRetvalV4I16 : StoreRetvalV4Inst<B16, ".b16">;
+def StoreRetvalV4I8 : StoreRetvalV4Inst<B16, ".b8">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2291,9 +2256,9 @@ def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
def CallVoidInst : NVPTXInst<(outs), (ins ADDR_base:$addr), "$addr, ",
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
-def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
+def CallVoidInstReg : NVPTXInst<(outs), (ins B32:$addr), "$addr, ",
[(CallVoid i32:$addr)]>;
-def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ",
+def CallVoidInstReg64 : NVPTXInst<(outs), (ins B64:$addr), "$addr, ",
[(CallVoid i64:$addr)]>;
def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
[(Prototype (i32 imm:$val))]>;
@@ -2337,11 +2302,11 @@ class PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
"// Pseudo use of $src",
[(PseudoUseParam vt:$src)]>;
-def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs, i64>;
-def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs, i32>;
-def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs, i16>;
-def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs, f64>;
-def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs, f32>;
+def PseudoUseParamI64 : PseudoUseParamInst<B64, i64>;
+def PseudoUseParamI32 : PseudoUseParamInst<B32, i32>;
+def PseudoUseParamI16 : PseudoUseParamInst<B16, i16>;
+def PseudoUseParamF64 : PseudoUseParamInst<B64, f64>;
+def PseudoUseParamF32 : PseudoUseParamInst<B32, f32>;
multiclass ProxyRegInst<string SzStr, NVPTXRegClass rc> {
def NAME : BasicNVPTXInst<(outs rc:$dst), (ins rc:$src),
@@ -2350,10 +2315,10 @@ multiclass ProxyRegInst<string SzStr, NVPTXRegClass rc> {
def : Pat<(vt (ProxyReg vt:$src)), (!cast<NVPTXInst>(NAME) $src)>;
}
-defm ProxyRegB1 : ProxyRegInst<"pred", Int1Regs>;
-defm ProxyRegB16 : ProxyRegInst<"b16", Int16Regs>;
-defm ProxyRegB32 : ProxyRegInst<"b32", Int32Regs>;
-defm ProxyRegB64 : ProxyRegInst<"b64", Int64Regs>;
+defm ProxyRegB1 : ProxyRegInst<"pred", B1>;
+defm ProxyRegB16 : ProxyRegInst<"b16", B16>;
+defm ProxyRegB32 : ProxyRegInst<"b32", B32>;
+defm ProxyRegB64 : ProxyRegInst<"b64", B64>;
//
// Load / Store Handling
@@ -2367,10 +2332,10 @@ class LD<NVPTXRegClass regclass>
"\t$dst, [$addr];", []>;
let mayLoad=1, hasSideEffects=0 in {
- def LD_i8 : LD<Int16Regs>;
- def LD_i16 : LD<Int16Regs>;
- def LD_i32 : LD<Int32Regs>;
- def LD_i64 : LD<Int64Regs>;
+ def LD_i8 : LD<B16>;
+ def LD_i16 : LD<B16>;
+ def LD_i32 : LD<B32>;
+ def LD_i64 : LD<B64>;
}
class ST<NVPTXRegClass regclass>
@@ -2382,10 +2347,10 @@ class ST<NVPTXRegClass regclass>
" \t[$addr], $src;", []>;
let mayStore=1, hasSideEffects=0 in {
- def ST_i8 : ST<Int16Regs>;
- def ST_i16 : ST<Int16Regs>;
- def ST_i32 : ST<Int32Regs>;
- def ST_i64 : ST<Int64Regs>;
+ def ST_i8 : ST<B16>;
+ def ST_i16 : ST<B16>;
+ def ST_i32 : ST<B32>;
+ def ST_i64 : ST<B64>;
}
// The following is used only in and after vector elementizations. Vector
@@ -2415,10 +2380,10 @@ multiclass LD_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
"[$addr];", []>;
}
let mayLoad=1, hasSideEffects=0 in {
- defm LDV_i8 : LD_VEC<Int16Regs>;
- defm LDV_i16 : LD_VEC<Int16Regs>;
- defm LDV_i32 : LD_VEC<Int32Regs, support_v8 = true>;
- defm LDV_i64 : LD_VEC<Int64Regs>;
+ defm LDV_i8 : LD_VEC<B16>;
+ defm LDV_i16 : LD_VEC<B16>;
+ defm LDV_i32 : LD_VEC<B32, support_v8 = true>;
+ defm LDV_i64 : LD_VEC<B64>;
}
multiclass ST_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
@@ -2449,15 +2414,15 @@ multiclass ST_VEC<NVPTXRegClass regclass, bit support_v8 = false> {
}
let mayStore=1, hasSideEffects=0 in {
- defm STV_i8 : ST_VEC<Int16Regs>;
- defm STV_i16 : ST_VEC<Int16Regs>;
- defm STV_i32 : ST_VEC<Int32Regs, support_v8 = true>;
- defm STV_i64 : ST_VEC<Int64Regs>;
+ defm STV_i8 : ST_VEC<B16>;
+ defm STV_i16 : ST_VEC<B16>;
+ defm STV_i32 : ST_VEC<B32, support_v8 = true>;
+ defm STV_i64 : ST_VEC<B64>;
}
//---- Conversion ----
-foreach rc = [Int16Regs, Int32Regs, Int64Regs] in
+foreach rc = [B16, B32, B64] in
foreach ta = rc.RegTypes in
foreach tb = rc.RegTypes in
if !ne(ta, tb) then
@@ -2638,62 +2603,62 @@ def : Pat<(sext_inreg i64:$a, i32), (CVT_INREG_s64_s32 $a)>;
let hasSideEffects = false in {
// pack a set of smaller int registers to a larger int register
- def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
- (ins Int16Regs:$s1, Int16Regs:$s2,
- Int16Regs:$s3, Int16Regs:$s4),
+ def V4I16toI64 : NVPTXInst<(outs B64:$d),
+ (ins B16:$s1, B16:$s2,
+ B16:$s3, B16:$s4),
"mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>;
- def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
- (ins Int16Regs:$s1, Int16Regs:$s2),
+ def V2I16toI32 : NVPTXInst<(outs B32:$d),
+ (ins B16:$s1, B16:$s2),
"mov.b32 \t$d, {{$s1, $s2}};", []>;
- def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
- (ins Int32Regs:$s1, Int32Regs:$s2),
+ def V2I32toI64 : NVPTXInst<(outs B64:$d),
+ (ins B32:$s1, B32:$s2),
"mov.b64 \t$d, {{$s1, $s2}};", []>;
- def V2I64toI128 : NVPTXInst<(outs Int128Regs:$d),
- (ins Int64Regs:$s1, Int64Regs:$s2),
+ def V2I64toI128 : NVPTXInst<(outs B128:$d),
+ (ins B64:$s1, B64:$s2),
"mov.b128 \t$d, {{$s1, $s2}};", []>;
// unpack a larger int register to a set of smaller int registers
- def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
- Int16Regs:$d3, Int16Regs:$d4),
- (ins Int64Regs:$s),
+ def I64toV4I16 : NVPTXInst<(outs B16:$d1, B16:$d2,
+ B16:$d3, B16:$d4),
+ (ins B64:$s),
"mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>;
- def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
- (ins Int32Regs:$s),
+ def I32toV2I16 : NVPTXInst<(outs B16:$d1, B16:$d2),
+ (ins B32:$s),
"mov.b32 \t{{$d1, $d2}}, $s;", []>;
- def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
- (ins Int64Regs:$s),
+ def I64toV2I32 : NVPTXInst<(outs B32:$d1, B32:$d2),
+ (ins B64:$s),
"mov.b64 \t{{$d1, $d2}}, $s;", []>;
- def I128toV2I64: NVPTXInst<(outs Int64Regs:$d1, Int64Regs:$d2),
- (ins Int128Regs:$s),
+ def I128toV2I64: NVPTXInst<(outs B64:$d1, B64:$d2),
+ (ins B128:$s),
"mov.b128 \t{{$d1, $d2}}, $s;", []>;
- def I32toI16H : NVPTXInst<(outs Int16Regs:$high),
- (ins Int32Regs:$s),
+ def I32toI16H : NVPTXInst<(outs B16:$high),
+ (ins B32:$s),
"{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
[]>;
- def I32toI16L : NVPTXInst<(outs Int16Regs:$low),
- (ins Int32Regs:$s),
+ def I32toI16L : NVPTXInst<(outs B16:$low),
+ (ins B32:$s),
"{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
[]>;
- def I64toI32H : NVPTXInst<(outs Int32Regs:$high),
- (ins Int64Regs:$s),
+ def I64toI32H : NVPTXInst<(outs B32:$high),
+ (ins B64:$s),
"{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
[]>;
- def I64toI32L : NVPTXInst<(outs Int32Regs:$low),
- (ins Int64Regs:$s),
+ def I64toI32L : NVPTXInst<(outs B32:$low),
+ (ins B64:$s),
"{{ .reg .b32 tmp; mov.b64 {$low, tmp}, $s; }}",
[]>;
// PTX 7.1 lets you avoid a temp register and just use _ as a "sink" for the
// unused high/low part.
let Predicates = [hasPTX<71>] in {
- def I32toI16H_Sink : NVPTXInst<(outs Int16Regs:$high), (ins Int32Regs:$s),
+ def I32toI16H_Sink : NVPTXInst<(outs B16:$high), (ins B32:$s),
"mov.b32 \t{{_, $high}}, $s;", []>;
- def I32toI16L_Sink : NVPTXInst<(outs Int16Regs:$low), (ins Int32Regs:$s),
+ def I32toI16L_Sink : NVPTXInst<(outs B16:$low), (ins B32:$s),
"mov.b32 \t{{$low, _}}, $s;", []>;
- def I64toI32H_Sink : NVPTXInst<(outs Int32Regs:$high), (ins Int64Regs:$s),
+ def I64toI32H_Sink : NVPTXInst<(outs B32:$high), (ins B64:$s),
"mov.b64 \t{{_, $high}}, $s;", []>;
- def I64toI32L_Sink : NVPTXInst<(outs Int32Regs:$low), (ins Int64Regs:$s),
+ def I64toI32L_Sink : NVPTXInst<(outs B32:$low), (ins B64:$s),
"mov.b64 \t{{$low, _}}, $s;", []>;
}
}
@@ -2748,16 +2713,16 @@ def fshr_clamp : SDNode<"NVPTXISD::FSHR_CLAMP", SDTIntShiftDOp, []>;
let hasSideEffects = false in {
multiclass ShfInst<string mode, SDNode op> {
def _i
- : BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+ : BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$lo, B32:$hi, i32imm:$amt),
"shf." # mode # ".b32",
[(set i32:$dst,
(op i32:$hi, i32:$lo, (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def _r
- : BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ : BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$lo, B32:$hi, B32:$amt),
"shf." # mode # ".b32",
[(set i32:$dst,
(op i32:$hi, i32:$lo, i32:$amt))]>,
@@ -2782,12 +2747,12 @@ def : Pat<(i32 (int_nvvm_fshr_clamp i32:$hi, i32:$lo, (i32 imm:$amt))),
let hasSideEffects = false in {
foreach RT = [I32RT, I64RT] in {
// Count leading zeros
- def CLZr # RT.Size : BasicNVPTXInst<(outs Int32Regs:$d), (ins RT.RC:$a),
+ def CLZr # RT.Size : BasicNVPTXInst<(outs B32:$d), (ins RT.RC:$a),
"clz.b" # RT.Size,
[(set i32:$d, (ctlz RT.Ty:$a))]>;
// Population count
- def POPCr # RT.Size : BasicNVPTXInst<(outs Int32Regs:$d), (ins RT.RC:$a),
+ def POPCr # RT.Size : BasicNVPTXInst<(outs B32:$d), (ins RT.RC:$a),
"popc.b" # RT.Size,
[(set i32:$d, (ctpop RT.Ty:$a))]>;
}
@@ -2862,11 +2827,11 @@ let isTerminator=1 in {
def Return : BasicNVPTXInst<(outs), (ins), "ret", [(retglue)]>;
let isBranch=1 in
- def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
+ def CBranch : NVPTXInst<(outs), (ins B1:$a, brtarget:$target),
"@$a bra \t$target;",
[(brcond i1:$a, bb:$target)]>;
let isBranch=1 in
- def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
+ def CBranchOther : NVPTXInst<(outs), (ins B1:$a, brtarget:$target),
"@!$a bra \t$target;", []>;
let isBranch=1, isBarrier=1 in
@@ -2987,7 +2952,7 @@ let isTerminator = 1, isBranch = 1, isIndirectBranch = 1, isNotDuplicable = 1 in
[(brx_item bb:$target)]>;
def BRX_END :
- NVPTXInst<(outs), (ins brtarget:$target, Int32Regs:$val, i32imm:$id),
+ NVPTXInst<(outs), (ins brtarget:$target, B32:$val, i32imm:$id),
"\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;",
[(brx_end bb:$target, i32:$val, (i32 imm:$id))]> {
let isBarrier = 1;
@@ -2999,8 +2964,8 @@ foreach a_type = ["s", "u"] in {
foreach b_type = ["s", "u"] in {
def DOT4_ # a_type # b_type :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b, B32:$c),
"dp4a." # a_type # "32." # b_type # "32",
[(set i32:$dst,
(!cast<Intrinsic>("int_nvvm_idp4a_" # a_type # "_" # b_type)
@@ -3011,8 +2976,8 @@ foreach a_type = ["s", "u"] in {
defvar lohi_suffix = !if(is_hi, "hi", "lo");
def DOT2_ # lohi_suffix # _ # a_type # b_type :
- BasicNVPTXInst<(outs Int32Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
+ BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$a, B32:$b, B32:$c),
"dp2a." # lohi_suffix # "." # a_type # "32." # b_type # "32",
[(set i32:$dst,
(!cast<Intrinsic>("int_nvvm_idp2a_" # a_type # "_" # b_type)
@@ -3124,12 +3089,12 @@ class NVPTXInst_rrr<RegisterClass RC, string Instruction, list<Predicate> Preds>
: BasicNVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), Instruction>,
Requires<Preds>;
-def FMARELU_F16 : NVPTXInst_rrr<Int16Regs, "fma.rn.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16_FTZ : NVPTXInst_rrr<Int16Regs, "fma.rn.ftz.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_BF16 : NVPTXInst_rrr<Int16Regs, "fma.rn.relu.bf16", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16X2 : NVPTXInst_rrr<Int32Regs, "fma.rn.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16X2_FTZ : NVPTXInst_rrr<Int32Regs, "fma.rn.ftz.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_BF16X2 : NVPTXInst_rrr<Int32Regs, "fma.rn.relu.bf16x2", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
+def FMARELU_F16 : NVPTXInst_rrr<B16, "fma.rn.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
+def FMARELU_F16_FTZ : NVPTXInst_rrr<B16, "fma.rn.ftz.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
+def FMARELU_BF16 : NVPTXInst_rrr<B16, "fma.rn.relu.bf16", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
+def FMARELU_F16X2 : NVPTXInst_rrr<B32, "fma.rn.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
+def FMARELU_F16X2_FTZ : NVPTXInst_rrr<B32, "fma.rn.ftz.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
+def FMARELU_BF16X2 : NVPTXInst_rrr<B32, "fma.rn.relu.bf16x2", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
// FTZ
def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 5de3dee1fb344..10d7f04d8d937 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -63,14 +63,14 @@ class RegSeq<int n, string prefix> {
// Synchronization and shuffle functions
//-----------------------------------
let isConvergent = true in {
-def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+def INT_BARRIER0_POPC : NVPTXInst<(outs B32:$dst), (ins B32:$pred),
!strconcat("{{ \n\t",
".reg .pred \t%p1; \n\t",
"setp.ne.u32 \t%p1, $pred, 0; \n\t",
"bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
"}}"),
[(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>;
-def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+def INT_BARRIER0_AND : NVPTXInst<(outs B32:$dst), (ins B32:$pred),
!strconcat("{{ \n\t",
".reg .pred \t%p1; \n\t",
".reg .pred \t%p2; \n\t",
@@ -79,7 +79,7 @@ def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
"selp.u32 \t$dst, 1, 0, %p2; \n\t",
"}}"),
[(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>;
-def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
+def INT_BARRIER0_OR : NVPTXInst<(outs B32:$dst), (ins B32:$pred),
!strconcat("{{ \n\t",
".reg .pred \t%p1; \n\t",
".reg .pred \t%p2; \n\t",
@@ -92,7 +92,7 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
def INT_BAR_WARP_SYNC_I : BasicNVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync",
[(int_nvvm_bar_warp_sync imm:$i)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
-def INT_BAR_WARP_SYNC_R : BasicNVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync",
+def INT_BAR_WARP_SYNC_R : BasicNVPTXInst<(outs), (ins B32:$i), "bar.warp.sync",
[(int_nvvm_bar_warp_sync i32:$i)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
@@ -101,21 +101,21 @@ multiclass BARRIER1<string asmstr, Intrinsic intrinsic, list<Predicate> requires
[(intrinsic imm:$i)]>,
Requires<requires>;
- def _r : BasicNVPTXInst<(outs), (ins Int32Regs:$i), asmstr,
+ def _r : BasicNVPTXInst<(outs), (ins B32:$i), asmstr,
[(intrinsic i32:$i)]>,
Requires<requires>;
}
multiclass BARRIER2<string asmstr, Intrinsic intrinsic, list<Predicate> requires = []> {
- def _rr : BasicNVPTXInst<(outs), (ins Int32Regs:$i, Int32Regs:$j), asmstr,
+ def _rr : BasicNVPTXInst<(outs), (ins B32:$i, B32:$j), asmstr,
[(intrinsic i32:$i, i32:$j)]>,
Requires<requires>;
- def _ri : BasicNVPTXInst<(outs), (ins Int32Regs:$i, i32imm:$j), asmstr,
+ def _ri : BasicNVPTXInst<(outs), (ins B32:$i, i32imm:$j), asmstr,
[(intrinsic i32:$i, imm:$j)]>,
Requires<requires>;
- def _ir : BasicNVPTXInst<(outs), (ins i32imm:$i, Int32Regs:$j), asmstr,
+ def _ir : BasicNVPTXInst<(outs), (ins i32imm:$i, B32:$j), asmstr,
[(intrinsic imm:$i, i32:$j)]>,
Requires<requires>;
@@ -170,26 +170,26 @@ foreach sync = [false, true] in {
# "_" # regclass
# !if(return_pred, "p", ""));
defvar InOperandList = !con(
- (ins Int32Regs:$src),
- !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
- !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]),
+ (ins B32:$src),
+ !dag(ins, !if(offset_imm, [i32imm], [B32]), ["offset"]),
+ !dag(ins, !if(mask_imm, [i32imm], [B32]), ["mask"]),
!if(sync,
- !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
+ !dag(ins, !if(threadmask_imm, [i32imm], [B32]), ["threadmask"]),
(ins)));
defvar Pattern = !con(
- (set Int32Regs:$dst),
- !if(return_pred, (set Int1Regs:$pred), (set)),
+ (set B32:$dst),
+ !if(return_pred, (set B1:$pred), (set)),
(set !con(
!if(sync,
- !dag(Intr, !if(threadmask_imm, [imm], [Int32Regs]), ["threadmask"]),
+ !dag(Intr, !if(threadmask_imm, [imm], [B32]), ["threadmask"]),
(Intr)),
- (Intr Int32Regs:$src),
- !dag(Intr, !if(offset_imm, [imm], [Int32Regs]), ["offset"]),
- !dag(Intr, !if(mask_imm, [imm], [Int32Regs]), ["mask"]))));
+ (Intr B32:$src),
+ !dag(Intr, !if(offset_imm, [imm], [B32]), ["offset"]),
+ !dag(Intr, !if(mask_imm, [imm], [B32]), ["mask"]))));
def : BasicNVPTXInst<
- !if(return_pred, (outs Int32Regs:$dst, Int1Regs:$pred),
- (outs Int32Regs:$dst)),
+ !if(return_pred, (outs B32:$dst, B1:$pred),
+ (outs B32:$dst)),
InOperandList,
"shfl." # !if(sync, "sync.", "") # mode # ".b32",
[Pattern]>,
@@ -204,107 +204,107 @@ foreach sync = [false, true] in {
// vote.{all,any,uni,ballot}
multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
- def : BasicNVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
+ def : BasicNVPTXInst<(outs regclass:$dest), (ins B1:$pred),
"vote." # mode,
[(set regclass:$dest, (IntOp i1:$pred))]>,
Requires<[hasPTX<60>, hasSM<30>]>;
}
-defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
-defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
-defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
-defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
+defm VOTE_ALL : VOTE<B1, "all.pred", int_nvvm_vote_all>;
+defm VOTE_ANY : VOTE<B1, "any.pred", int_nvvm_vote_any>;
+defm VOTE_UNI : VOTE<B1, "uni.pred", int_nvvm_vote_uni>;
+defm VOTE_BALLOT : VOTE<B32, "ballot.b32", int_nvvm_vote_ballot>;
// vote.sync.{all,any,uni,ballot}
multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
- def i : BasicNVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred, i32imm:$mask),
+ def i : BasicNVPTXInst<(outs regclass:$dest), (ins B1:$pred, i32imm:$mask),
"vote.sync." # mode,
[(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>,
Requires<[hasPTX<60>, hasSM<30>]>;
- def r : BasicNVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred, Int32Regs:$mask),
+ def r : BasicNVPTXInst<(outs regclass:$dest), (ins B1:$pred, B32:$mask),
"vote.sync." # mode,
[(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>,
Requires<[hasPTX<60>, hasSM<30>]>;
}
-defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
-defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
-defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
-defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
+defm VOTE_SYNC_ALL : VOTE_SYNC<B1, "all.pred", int_nvvm_vote_all_sync>;
+defm VOTE_SYNC_ANY : VOTE_SYNC<B1, "any.pred", int_nvvm_vote_any_sync>;
+defm VOTE_SYNC_UNI : VOTE_SYNC<B1, "uni.pred", int_nvvm_vote_uni_sync>;
+defm VOTE_SYNC_BALLOT : VOTE_SYNC<B32, "ballot.b32", int_nvvm_vote_ballot_sync>;
// elect.sync
-def INT_ELECT_SYNC_I : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask),
+def INT_ELECT_SYNC_I : BasicNVPTXInst<(outs B32:$dest, B1:$pred), (ins i32imm:$mask),
"elect.sync",
[(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>,
Requires<[hasPTX<80>, hasSM<90>]>;
-def INT_ELECT_SYNC_R : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask),
+def INT_ELECT_SYNC_R : BasicNVPTXInst<(outs B32:$dest, B1:$pred), (ins B32:$mask),
"elect.sync",
[(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>,
Requires<[hasPTX<80>, hasSM<90>]>;
multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
Operand ImmOp> {
- def ii : BasicNVPTXInst<(outs Int32Regs:$dest), (ins ImmOp:$value, i32imm:$mask),
+ def ii : BasicNVPTXInst<(outs B32:$dest), (ins ImmOp:$value, i32imm:$mask),
"match.any.sync." # ptxtype,
[(set i32:$dest, (IntOp imm:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
- def ir : BasicNVPTXInst<(outs Int32Regs:$dest), (ins ImmOp:$value, Int32Regs:$mask),
+ def ir : BasicNVPTXInst<(outs B32:$dest), (ins ImmOp:$value, B32:$mask),
"match.any.sync." # ptxtype,
[(set i32:$dest, (IntOp i32:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
- def ri : BasicNVPTXInst<(outs Int32Regs:$dest), (ins regclass:$value, i32imm:$mask),
+ def ri : BasicNVPTXInst<(outs B32:$dest), (ins regclass:$value, i32imm:$mask),
"match.any.sync." # ptxtype,
[(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
- def rr : BasicNVPTXInst<(outs Int32Regs:$dest), (ins regclass:$value, Int32Regs:$mask),
+ def rr : BasicNVPTXInst<(outs B32:$dest), (ins regclass:$value, B32:$mask),
"match.any.sync." # ptxtype,
[(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
}
// activemask.b32
-def ACTIVEMASK : BasicNVPTXInst<(outs Int32Regs:$dest), (ins),
+def ACTIVEMASK : BasicNVPTXInst<(outs B32:$dest), (ins),
"activemask.b32",
[(set i32:$dest, (int_nvvm_activemask))]>,
Requires<[hasPTX<62>, hasSM<30>]>;
-defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
+defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<B32, "b32", int_nvvm_match_any_sync_i32,
i32imm>;
-defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
+defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<B64, "b64", int_nvvm_match_any_sync_i64,
i64imm>;
multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
Operand ImmOp> {
- def ii : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
+ def ii : BasicNVPTXInst<(outs B32:$dest, B1:$pred),
(ins ImmOp:$value, i32imm:$mask),
"match.all.sync." # ptxtype,
[(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
- def ir : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
- (ins ImmOp:$value, Int32Regs:$mask),
+ def ir : BasicNVPTXInst<(outs B32:$dest, B1:$pred),
+ (ins ImmOp:$value, B32:$mask),
"match.all.sync." # ptxtype,
[(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
- def ri : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
+ def ri : BasicNVPTXInst<(outs B32:$dest, B1:$pred),
(ins regclass:$value, i32imm:$mask),
"match.all.sync." # ptxtype,
[(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
- def rr : BasicNVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
- (ins regclass:$value, Int32Regs:$mask),
+ def rr : BasicNVPTXInst<(outs B32:$dest, B1:$pred),
+ (ins regclass:$value, B32:$mask),
"match.all.sync." # ptxtype,
[(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>,
Requires<[hasPTX<60>, hasSM<70>]>;
}
-defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
+defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<B32, "b32", int_nvvm_match_all_sync_i32p,
i32imm>;
-defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
+defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<B64, "b64", int_nvvm_match_all_sync_i64p,
i64imm>;
multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
- def : BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
+ def : BasicNVPTXInst<(outs B32:$dst), (ins B32:$src, B32:$mask),
"redux.sync." # BinOp # "." # PTXType,
- [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>,
+ [(set i32:$dst, (Intrin i32:$src, B32:$mask))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
}
@@ -320,10 +320,10 @@ defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
multiclass REDUX_SYNC_F<string BinOp, string abs, string NaN> {
defvar intr_name = "int_nvvm_redux_sync_f" # BinOp # !subst(".", "_", abs) # !subst(".", "_", NaN);
- def : BasicNVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$src, Int32Regs:$mask),
+ def : BasicNVPTXInst<(outs B32:$dst),
+ (ins B32:$src, B32:$mask),
"redux.sync." # BinOp # abs # NaN # ".f32",
- [(set f32:$dst, (!cast<Intrinsic>(intr_name) f32:$src, Int32Regs:$mask))]>,
+ [(set f32:$dst, (!cast<Intrinsic>(intr_name) f32:$src, B32:$mask))]>,
Requires<[hasPTX<86>, hasSM100a]>;
}
@@ -377,7 +377,7 @@ def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS:
// fence.proxy.tensormap.acquire variants
class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<string Scope, Intrinsic Intr> :
- NVPTXInst<(outs), (ins Int64Regs:$addr),
+ NVPTXInst<(outs), (ins B64:$addr),
"fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;",
[(Intr i64:$addr, (i32 128))]>,
Requires<[hasPTX<83>, hasSM<90>]>;
@@ -422,7 +422,7 @@ multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin,
Requires<[hasPTX<70>, hasSM<80>]>;
// Variant with src_size parameter
- def _s : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$src_size),
+ def _s : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, B32:$src_size),
"cp.async." # cc # ".shared.global" # " [$dst], [$src], " # cpsize # ", $src_size;",
[(IntrinS addr:$dst, addr:$src, i32:$src_size)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -498,14 +498,14 @@ class CpAsyncBulkStr<bit mc, bit ch, bit mask = 0> {
}
multiclass CP_ASYNC_BULK_S2G_INTR<bit has_ch> {
- def "" : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$size, Int64Regs:$ch),
+ def "" : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, B32:$size, B64:$ch),
!if(has_ch,
CpAsyncBulkStr<0, 1>.S2G # " [$dst], [$src], $size, $ch;",
CpAsyncBulkStr<0, 0>.S2G # " [$dst], [$src], $size;"),
[(int_nvvm_cp_async_bulk_shared_cta_to_global addr:$dst, addr:$src, i32:$size, i64:$ch, !if(has_ch, -1, 0))]>,
Requires<[hasPTX<80>, hasSM<90>]>;
- def _BM : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$size, Int64Regs:$ch, Int16Regs:$mask),
+ def _BM : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, B32:$size, B64:$ch, B16:$mask),
!if(has_ch,
CpAsyncBulkStr<0, 1, 1>.S2G # " [$dst], [$src], $size, $ch, $mask;",
CpAsyncBulkStr<0, 0, 1>.S2G # " [$dst], [$src], $size, $mask;"),
@@ -520,7 +520,7 @@ multiclass CP_ASYNC_BULK_G2S_INTR<bit has_ch> {
def "" : NVPTXInst<(outs),
(ins ADDR:$dst, ADDR:$mbar, ADDR:$src,
- Int32Regs:$size, Int16Regs:$mask, Int64Regs:$ch),
+ B32:$size, B16:$mask, B64:$ch),
!if(has_ch,
CpAsyncBulkStr<0, 1>.G2S # " [$dst], [$src], $size, [$mbar], $ch;",
CpAsyncBulkStr<0, 0>.G2S # " [$dst], [$src], $size, [$mbar];"),
@@ -529,7 +529,7 @@ multiclass CP_ASYNC_BULK_G2S_INTR<bit has_ch> {
def _MC : NVPTXInst<(outs),
(ins ADDR:$dst, ADDR:$mbar, ADDR:$src,
- Int32Regs:$size, Int16Regs:$mask, Int64Regs:$ch),
+ B32:$size, B16:$mask, B64:$ch),
!if(has_ch,
CpAsyncBulkStr<1, 1>.G2S # " [$dst], [$src], $size, [$mbar], $mask, $ch;",
CpAsyncBulkStr<1, 0>.G2S # " [$dst], [$src], $size, [$mbar], $mask;"),
@@ -540,13 +540,13 @@ defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S_INTR<has_ch = 0>;
defm CP_ASYNC_BULK_G2S_CH : CP_ASYNC_BULK_G2S_INTR<has_ch = 1>;
def CP_ASYNC_BULK_CTA_TO_CLUSTER : NVPTXInst<(outs),
- (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, Int32Regs:$size),
+ (ins ADDR:$dst, ADDR:$mbar, ADDR:$src, B32:$size),
CpAsyncBulkStr<0, 0>.C2C # " [$dst], [$src], $size, [$mbar];",
[(int_nvvm_cp_async_bulk_shared_cta_to_cluster addr:$dst, addr:$mbar, addr:$src, i32:$size)]>,
Requires<[hasPTX<80>, hasSM<90>]>;
multiclass CP_ASYNC_BULK_PREFETCH_INTR<bit has_ch> {
- def "" : NVPTXInst<(outs), (ins ADDR:$src, Int32Regs:$size, Int64Regs:$ch),
+ def "" : NVPTXInst<(outs), (ins ADDR:$src, B32:$size, B64:$ch),
!if(has_ch,
"cp.async.bulk.prefetch.L2.global.L2::cache_hint" # " [$src], $size, $ch;",
"cp.async.bulk.prefetch.L2.global" # " [$src], $size;"),
@@ -583,14 +583,14 @@ def CTAGroupFlags : Operand<i32> {
}
multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode> {
- defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i));
defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
defvar asm_str_default = "$cg [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]";
- defvar rc = !if(is_shared32, Int32Regs, Int64Regs);
+ defvar rc = !if(is_shared32, B32, B64);
defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
defvar im2col_dag = !if(!eq(mode, "im2col"),
- !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
+ !dag(ins, !listsplat(B16, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
(ins));
defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
defvar im2col_asm_str = ", {{" # im2col_str # "}}";
@@ -599,22 +599,22 @@ multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode>
!strconcat(asm_str_default, im2col_asm_str), asm_str_default);
def "" : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins CTAGroupFlags:$cg)),
+ !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag, (ins CTAGroupFlags:$cg)),
!strconcat(G2S_STRINGS<dim, mode, 0, 0>.inst_name, asm_str, ";"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
def _MC : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag,
- (ins Int16Regs:$mc, CTAGroupFlags:$cg)),
+ !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag,
+ (ins B16:$mc, CTAGroupFlags:$cg)),
!strconcat(G2S_STRINGS<dim, mode, 1, 0>.inst_name, asm_str, ", $mc;"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
def _CH : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag,
- (ins Int64Regs:$ch, CTAGroupFlags:$cg)),
+ !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag,
+ (ins B64:$ch, CTAGroupFlags:$cg)),
!strconcat(G2S_STRINGS<dim, mode, 0, 1>.inst_name, asm_str, ", $ch;"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
def _MC_CH : NVPTXInst<(outs),
- !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag,
- (ins Int16Regs:$mc, Int64Regs:$ch, CTAGroupFlags:$cg)),
+ !con((ins rc:$dst, rc:$mbar, B64:$tmap), dims_dag, im2col_dag,
+ (ins B16:$mc, B64:$ch, CTAGroupFlags:$cg)),
!strconcat(G2S_STRINGS<dim, mode, 1, 1>.inst_name, asm_str, ", $mc, $ch;"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
}
@@ -648,17 +648,17 @@ class S2G_STRINGS<int dim, string mode, bit ch,
}
multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, bit shared32, string mode> {
- defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i));
defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
- defvar rc = !if(shared32, Int32Regs, Int64Regs);
+ defvar rc = !if(shared32, B32, B64);
def "" : NVPTXInst<(outs),
- !con((ins rc:$src, Int64Regs:$tmap), dims_dag),
+ !con((ins rc:$src, B64:$tmap), dims_dag),
!strconcat(S2G_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
def _CH : NVPTXInst<(outs),
- !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch)),
+ !con((ins rc:$src, B64:$tmap), dims_dag, (ins B64:$ch)),
!strconcat(S2G_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
}
@@ -669,20 +669,20 @@ def TMAReductionFlags : Operand<i32> {
// TMA Copy from Shared to Global memory with Reduction
multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, bit shared32, string mode> {
- defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i));
defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]";
- defvar rc = !if(shared32, Int32Regs, Int64Regs);
+ defvar rc = !if(shared32, B32, B64);
defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta";
defvar suffix = "." # mode # ".bulk_group";
def "" : NVPTXInst<(outs),
- !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)),
+ !con((ins rc:$src, B64:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)),
!strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
def _CH : NVPTXInst<(outs),
- !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch, TMAReductionFlags:$red_op)),
+ !con((ins rc:$src, B64:$tmap), dims_dag, (ins B64:$ch, TMAReductionFlags:$red_op)),
!strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
}
@@ -713,13 +713,13 @@ class PREFETCH_STRINGS<int dim, string mode, bit ch> {
}
multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
- defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i));
+ defvar dims_dag = !dag(ins, !listsplat(B32, dim), !foreach(i, !range(dim), "d" # i));
defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", ");
defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]";
defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0);
defvar im2col_dag = !if(!eq(mode, "im2col"),
- !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
+ !dag(ins, !listsplat(B16, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)),
(ins));
defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", ");
defvar im2col_asm_str = ", {{" # im2col_str # "}}";
@@ -728,11 +728,11 @@ multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
!strconcat(asm_str_default, im2col_asm_str), asm_str_default);
def "" : NVPTXInst<(outs),
- !con((ins Int64Regs:$tmap), dims_dag, im2col_dag),
+ !con((ins B64:$tmap), dims_dag, im2col_dag),
!strconcat(PREFETCH_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
def _CH : NVPTXInst<(outs),
- !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)),
+ !con((ins B64:$tmap), dims_dag, im2col_dag, (ins B64:$ch)),
!strconcat(PREFETCH_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>,
Requires<[hasPTX<80>, hasSM<90>]>;
}
@@ -776,7 +776,7 @@ def PREFETCHU_L1 : PREFETCH_INTRS<"prefetchu.L1">;
//Applypriority intrinsics
class APPLYPRIORITY_L2_INTRS<string addrspace> :
- BasicNVPTXInst<(outs), (ins ADDR:$addr, Int64Regs:$size),
+ BasicNVPTXInst<(outs), (ins ADDR:$addr, B64:$size),
StrJoin<".", ["applypriority", addrspace , "L2::evict_normal"]>.ret,
[(!cast<Intrinsic>(StrJoin<"_", ["int_nvvm_applypriority", addrspace , "L2_evict_normal"]>.ret)
addr:$addr, i64:$size)]>,
@@ -804,7 +804,7 @@ def DISCARD_GLOBAL_L2 : DISCARD_L2_INTRS<"global">;
//-----------------------------------
multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
- def "" : BasicNVPTXInst<(outs), (ins ADDR:$addr, Int32Regs:$count),
+ def "" : BasicNVPTXInst<(outs), (ins ADDR:$addr, B32:$count),
"mbarrier.init" # AddrSpace # ".b64",
[(Intrin addr:$addr, i32:$count)]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -826,7 +826,7 @@ defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
int_nvvm_mbarrier_inval_shared>;
multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
- def "" : BasicNVPTXInst<(outs Int64Regs:$state), (ins ADDR:$addr),
+ def "" : BasicNVPTXInst<(outs B64:$state), (ins ADDR:$addr),
"mbarrier.arrive" # AddrSpace # ".b64",
[(set i64:$state, (Intrin addr:$addr))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -837,8 +837,8 @@ defm MBARRIER_ARRIVE_SHARED :
MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
- def "" : BasicNVPTXInst<(outs Int64Regs:$state),
- (ins ADDR:$addr, Int32Regs:$count),
+ def "" : BasicNVPTXInst<(outs B64:$state),
+ (ins ADDR:$addr, B32:$count),
"mbarrier.arrive.noComplete" # AddrSpace # ".b64",
[(set i64:$state, (Intrin addr:$addr, i32:$count))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -850,7 +850,7 @@ defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
- def "" : BasicNVPTXInst<(outs Int64Regs:$state), (ins ADDR:$addr),
+ def "" : BasicNVPTXInst<(outs B64:$state), (ins ADDR:$addr),
"mbarrier.arrive_drop" # AddrSpace # ".b64",
[(set i64:$state, (Intrin addr:$addr))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -862,8 +862,8 @@ defm MBARRIER_ARRIVE_DROP_SHARED :
MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
- def "" : BasicNVPTXInst<(outs Int64Regs:$state),
- (ins ADDR:$addr, Int32Regs:$count),
+ def "" : BasicNVPTXInst<(outs B64:$state),
+ (ins ADDR:$addr, B32:$count),
"mbarrier.arrive_drop.noComplete" # AddrSpace # ".b64",
[(set i64:$state, (Intrin addr:$addr, i32:$count))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -876,7 +876,7 @@ defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
- def "" : BasicNVPTXInst<(outs Int1Regs:$res), (ins ADDR:$addr, Int64Regs:$state),
+ def "" : BasicNVPTXInst<(outs B1:$res), (ins ADDR:$addr, B64:$state),
"mbarrier.test_wait" # AddrSpace # ".b64",
[(set i1:$res, (Intrin addr:$addr, i64:$state))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -888,7 +888,7 @@ defm MBARRIER_TEST_WAIT_SHARED :
MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
- BasicNVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
+ BasicNVPTXInst<(outs B32:$res), (ins B64:$state),
"mbarrier.pending_count.b64",
[(set i32:$res, (Intrin i64:$state))]>,
Requires<[hasPTX<70>, hasSM<80>]>;
@@ -989,7 +989,7 @@ def : PRMT2Pat<int_nvvm_prmt_rc16, PrmtRC16>;
def INT_NVVM_NANOSLEEP_I : BasicNVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32",
[(int_nvvm_nanosleep imm:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;
-def INT_NVVM_NANOSLEEP_R : BasicNVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32",
+def INT_NVVM_NANOSLEEP_R : BasicNVPTXInst<(outs), (ins B32:$i), "nanosleep.u32",
[(int_nvvm_nanosleep i32:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;
@@ -1006,64 +1006,46 @@ def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs),
// Min Max
//
-def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", Float32Regs,
- Float32Regs, Float32Regs, int_nvvm_fmin_f>;
-def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
-def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
+def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", B32, B32, B32, int_nvvm_fmin_f>;
+def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32", B32, B32, B32, int_nvvm_fmin_ftz_f>;
+def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32", B32, B32, B32, int_nvvm_fmin_nan_f,
[hasPTX<70>, hasSM<80>]>;
-def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
+def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_f,
[hasPTX<70>, hasSM<80>]>;
def INT_NVVM_FMIN_XORSIGN_ABS_F :
- F_MATH_2<"min.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
+ F_MATH_2<"min.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
- F_MATH_2<"min.ftz.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
+ F_MATH_2<"min.ftz.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
- F_MATH_2<"min.NaN.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
+ F_MATH_2<"min.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_nan_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
- F_MATH_2<"min.ftz.NaN.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
+ F_MATH_2<"min.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
-def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", Float32Regs,
- Float32Regs, Float32Regs, int_nvvm_fmax_f>;
-def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
-def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
+def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", B32, B32, B32, int_nvvm_fmax_f>;
+def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32", B32, B32, B32, int_nvvm_fmax_ftz_f>;
+def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32", B32, B32, B32, int_nvvm_fmax_nan_f,
[hasPTX<70>, hasSM<80>]>;
-def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
+def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_f,
[hasPTX<70>, hasSM<80>]>;
def INT_NVVM_FMAX_XORSIGN_ABS_F :
- F_MATH_2<"max.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
+ F_MATH_2<"max.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
- F_MATH_2<"max.ftz.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
+ F_MATH_2<"max.ftz.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
- F_MATH_2<"max.NaN.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
+ F_MATH_2<"max.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_nan_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
- F_MATH_2<"max.ftz.NaN.xorsign.abs.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
+ F_MATH_2<"max.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
-def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", Float64Regs,
- Float64Regs, Float64Regs, int_nvvm_fmin_d>;
-def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", Float64Regs,
- Float64Regs, Float64Regs, int_nvvm_fmax_d>;
+def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", B64, B64, B64, int_nvvm_fmin_d>;
+def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", B64, B64, B64, int_nvvm_fmax_d>;
//
// Min Max f16, f16x2, bf16, bf16x2
@@ -1080,67 +1062,67 @@ class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
multiclass MIN_MAX<string IntName> {
foreach P = [
MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
- int_nvvm_fmax_f16), Int16Regs>,
+ int_nvvm_fmax_f16), B16>,
MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
- int_nvvm_fmax_ftz_f16), Int16Regs>,
+ int_nvvm_fmax_ftz_f16), B16>,
MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
- int_nvvm_fmax_nan_f16), Int16Regs>,
+ int_nvvm_fmax_nan_f16), B16>,
MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
+ int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), B16>,
MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
- Int16Regs, [hasPTX<72>, hasSM<86>]>,
+ B16, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
- Int16Regs, [hasPTX<72>, hasSM<86>]>,
+ B16, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
- Int16Regs, [hasPTX<72>, hasSM<86>]>,
+ B16, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
- int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
+ int_nvvm_fmax_ftz_nan_xorsign_abs_f16), B16, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
- int_nvvm_fmax_f16x2), Int32Regs>,
+ int_nvvm_fmax_f16x2), B32>,
MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
+ int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), B32>,
MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
+ int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), B32>,
MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
+ int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), B32>,
MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
- Int32Regs, [hasPTX<72>, hasSM<86>]>,
+ B32, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
- Int32Regs, [hasPTX<72>, hasSM<86>]>,
+ B32, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
- Int32Regs, [hasPTX<72>, hasSM<86>]>,
+ B32, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
- Int32Regs, [hasPTX<72>, hasSM<86>]>,
+ B32, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
- int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
+ int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), B16>,
MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
- int_nvvm_fmax_nan_bf16), Int16Regs>,
+ int_nvvm_fmax_nan_bf16), B16>,
MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
- Int16Regs, [hasPTX<72>, hasSM<86>]>,
+ B16, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
- Int16Regs, [hasPTX<72>, hasSM<86>]>,
+ B16, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
- int_nvvm_fmax_bf16x2), Int32Regs>,
+ int_nvvm_fmax_bf16x2), B32>,
MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
+ int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), B32>,
MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
- Int32Regs, [hasPTX<72>, hasSM<86>]>,
+ B32, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_bf16x2,
int_nvvm_fmax_nan_xorsign_abs_bf16x2),
- Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
+ B32, [hasPTX<72>, hasSM<86>]>] in {
def P.Variant : F_MATH_2<!strconcat(
IntName, !subst("_", ".", P.Variant)),
P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
@@ -1154,85 +1136,50 @@ defm INT_NVVM_FMAN : MIN_MAX<"max">;
// Multiplication
//
-def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16", Int16Regs,
- Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
-def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16", Int16Regs,
- Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
-def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
-def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
-def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
-def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
-
-def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
-def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
-def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
-def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
-def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
-def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
-def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
-def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
-
-def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
-def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
-def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
-def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
-
-def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32",
- Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
-def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32",
- Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
+def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16", B16, B16, B16, int_nvvm_mulhi_s>;
+def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16", B16, B16, B16, int_nvvm_mulhi_us>;
+def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32", B32, B32, B32, int_nvvm_mulhi_i>;
+def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32", B32, B32, B32, int_nvvm_mulhi_ui>;
+def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64", B64, B64, B64, int_nvvm_mulhi_ll>;
+def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64", B64, B64, B64, int_nvvm_mulhi_ull>;
+
+def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32", B32, B32, B32, int_nvvm_mul_rn_ftz_f>;
+def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32", B32, B32, B32, int_nvvm_mul_rn_f>;
+def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32", B32, B32, B32, int_nvvm_mul_rz_ftz_f>;
+def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32", B32, B32, B32, int_nvvm_mul_rz_f>;
+def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32", B32, B32, B32, int_nvvm_mul_rm_ftz_f>;
+def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32", B32, B32, B32, int_nvvm_mul_rm_f>;
+def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32", B32, B32, B32, int_nvvm_mul_rp_ftz_f>;
+def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32", B32, B32, B32, int_nvvm_mul_rp_f>;
+
+def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64", B64, B64, B64, int_nvvm_mul_rn_d>;
+def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64", B64, B64, B64, int_nvvm_mul_rz_d>;
+def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64", B64, B64, B64, int_nvvm_mul_rm_d>;
+def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64", B64, B64, B64, int_nvvm_mul_rp_d>;
+
+def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32", B32, B32, B32, int_nvvm_mul24_i>;
+def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32", B32, B32, B32, int_nvvm_mul24_ui>;
//
// Div
//
-def INT_NVVM_DIV_APPROX_FTZ_F
- : F_MATH_2<"div.approx.ftz.f32", Float32Regs,
- Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
-def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
-
-def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
-def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
-def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
-def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
-def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
-def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
-def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
-def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
-
-def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
-def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
-def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
-def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
+def INT_NVVM_DIV_APPROX_FTZ_F : F_MATH_2<"div.approx.ftz.f32", B32, B32, B32, int_nvvm_div_approx_ftz_f>;
+def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32", B32, B32, B32, int_nvvm_div_approx_f>;
+
+def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32", B32, B32, B32, int_nvvm_div_rn_ftz_f>;
+def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32", B32, B32, B32, int_nvvm_div_rn_f>;
+def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32", B32, B32, B32, int_nvvm_div_rz_ftz_f>;
+def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32", B32, B32, B32, int_nvvm_div_rz_f>;
+def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32", B32, B32, B32, int_nvvm_div_rm_ftz_f>;
+def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32", B32, B32, B32, int_nvvm_div_rm_f>;
+def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32", B32, B32, B32, int_nvvm_div_rp_ftz_f>;
+def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32", B32, B32, B32, int_nvvm_div_rp_f>;
+
+def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64", B64, B64, B64, int_nvvm_div_rn_d>;
+def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64", B64, B64, B64, int_nvvm_div_rz_d>;
+def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64", B64, B64, B64, int_nvvm_div_rm_d>;
+def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64", B64, B64, B64, int_nvvm_div_rp_d>;
def : Pat<(int_nvvm_div_full f32:$a, f32:$b),
(FDIV32rr $a, $b)>;
@@ -1250,18 +1197,12 @@ def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b),
// Sad
//
-def INT_NVVM_SAD_S : F_MATH_3<"sad.s16",
- Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
-def INT_NVVM_SAD_US : F_MATH_3<"sad.u16",
- Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
-def INT_NVVM_SAD_I : F_MATH_3<"sad.s32",
- Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
-def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32",
- Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
-def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64",
- Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
-def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64",
- Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
+def INT_NVVM_SAD_S : F_MATH_3<"sad.s16", B16, B16, B16, B16, int_nvvm_sad_s>;
+def INT_NVVM_SAD_US : F_MATH_3<"sad.u16", B16, B16, B16, B16, int_nvvm_sad_us>;
+def INT_NVVM_SAD_I : F_MATH_3<"sad.s32", B32, B32, B32, B32, int_nvvm_sad_i>;
+def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32", B32, B32, B32, B32, int_nvvm_sad_ui>;
+def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64", B64, B64, B64, B64, int_nvvm_sad_ll>;
+def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64", B64, B64, B64, B64, int_nvvm_sad_ull>;
//
// Floor Ceil
@@ -1301,12 +1242,12 @@ defm ABS_F64 : F_ABS<"f64", F64RT, support_ftz = false>;
def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>;
def COPYSIGN_F :
- BasicNVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1),
+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$src0, B32:$src1),
"copysign.f32",
[(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>;
def COPYSIGN_D :
- BasicNVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1),
+ BasicNVPTXInst<(outs B64:$dst), (ins B64:$src0, B64:$src1),
"copysign.f64",
[(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>;
@@ -1404,59 +1345,59 @@ class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
multiclass FMA_INST {
foreach P = [
- FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
- FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
- FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
- FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
-
- FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
- FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
- FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
- FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
- FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
- FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
- FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
- FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
-
- FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
+ FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, B64>,
+ FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, B64>,
+ FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, B64>,
+ FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, B64>,
+
+ FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, B32>,
+ FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, B32>,
+ FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, B32>,
+ FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, B32>,
+ FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, B32>,
+ FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, B32>,
+ FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, B32>,
+ FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, B32>,
+
+ FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, B16, [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, B16,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
+ FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, B16,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
+ FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, B16,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
+ FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
+ FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
+ FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, B16, [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
+ FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
+ FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
+ FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
+ FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, B16,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
+ FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, B32,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
+ FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, B32,
[hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
+ FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, B32,
[hasPTX<42>, hasSM<53>]>,
FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
- Int32Regs, [hasPTX<42>, hasSM<53>]>,
- FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
+ B32, [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, B32,
[hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
- Int32Regs, [hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
+ B32, [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, B32,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
+ FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, B32,
[hasPTX<70>, hasSM<80>]>
] in {
def P.Variant :
@@ -1566,31 +1507,19 @@ def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
// Add
//
-def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
-def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
-def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
-def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
-def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
-def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
-def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
-def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32",
- Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
-
-def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
-def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
-def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
-def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64",
- Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
+def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32", B32, B32, B32, int_nvvm_add_rn_ftz_f>;
+def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32", B32, B32, B32, int_nvvm_add_rn_f>;
+def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32", B32, B32, B32, int_nvvm_add_rz_ftz_f>;
+def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32", B32, B32, B32, int_nvvm_add_rz_f>;
+def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32", B32, B32, B32, int_nvvm_add_rm_ftz_f>;
+def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32", B32, B32, B32, int_nvvm_add_rm_f>;
+def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32", B32, B32, B32, int_nvvm_add_rp_ftz_f>;
+def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32", B32, B32, B32, int_nvvm_add_rp_f>;
+
+def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64", B64, B64, B64, int_nvvm_add_rn_d>;
+def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64", B64, B64, B64, int_nvvm_add_rz_d>;
+def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64", B64, B64, B64, int_nvvm_add_rm_d>;
+def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64", B64, B64, B64, int_nvvm_add_rp_d>;
//
// BFIND
@@ -1600,12 +1529,12 @@ foreach t = [I32RT, I64RT] in {
foreach sign = ["s", "u"] in {
defvar flo_intrin = !cast<Intrinsic>("int_nvvm_flo_" # sign);
def BFIND_ # sign # t.Size
- : BasicNVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src),
+ : BasicNVPTXInst<(outs B32:$dst), (ins t.RC:$src),
"bfind." # sign # t.Size,
[(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>;
def BFIND_SHIFTAMT_ # sign # t.Size
- : BasicNVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src),
+ : BasicNVPTXInst<(outs B32:$dst), (ins t.RC:$src),
"bfind.shiftamt." # sign # t.Size,
[(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>;
}
@@ -1856,24 +1785,24 @@ let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
//
class INT_FNS_MBO<dag ins, dag Operands>
- : BasicNVPTXInst<(outs Int32Regs:$dst), ins,
+ : BasicNVPTXInst<(outs B32:$dst), ins,
"fns.b32",
[(set i32:$dst, Operands)]>,
Requires<[hasPTX<60>, hasSM<30>]>;
-def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
+def INT_FNS_rrr : INT_FNS_MBO<(ins B32:$mask, B32:$base, B32:$offset),
(int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>;
-def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
+def INT_FNS_rri : INT_FNS_MBO<(ins B32:$mask, B32:$base, i32imm:$offset),
(int_nvvm_fns i32:$mask, i32:$base, imm:$offset)>;
-def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
+def INT_FNS_rir : INT_FNS_MBO<(ins B32:$mask, i32imm:$base, B32:$offset),
(int_nvvm_fns i32:$mask, imm:$base, i32:$offset)>;
-def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
+def INT_FNS_rii : INT_FNS_MBO<(ins B32:$mask, i32imm:$base, i32imm:$offset),
(int_nvvm_fns i32:$mask, imm:$base, imm:$offset)>;
-def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
+def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, B32:$base, B32:$offset),
(int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>;
-def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
+def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, B32:$base, i32imm:$offset),
(int_nvvm_fns imm:$mask, i32:$base, imm:$offset)>;
-def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
+def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, B32:$offset),
(int_nvvm_fns imm:$mask, imm:$base, i32:$offset)>;
def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
(int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
@@ -2145,10 +2074,10 @@ class LDU_G<string TyStr, NVPTXRegClass regclass>
: NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
"ldu.global." # TyStr # " \t$result, [$src];", []>;
-def LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
-def LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
-def LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
-def LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
+def LDU_GLOBAL_i8 : LDU_G<"b8", B16>;
+def LDU_GLOBAL_i16 : LDU_G<"b16", B16>;
+def LDU_GLOBAL_i32 : LDU_G<"b32", B32>;
+def LDU_GLOBAL_i64 : LDU_G<"b64", B64>;
// vector
@@ -2165,14 +2094,14 @@ class VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass>
"ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
-def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", Int16Regs>;
-def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", Int16Regs>;
-def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", Int32Regs>;
-def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", Int64Regs>;
+def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", B16>;
+def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", B16>;
+def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", B32>;
+def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", B64>;
-def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", Int16Regs>;
-def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", Int16Regs>;
-def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", Int32Regs>;
+def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", B16>;
+def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", B16>;
+def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", B32>;
//-----------------------------------
@@ -2187,10 +2116,10 @@ class LDG_G<NVPTXRegClass regclass>
: NVPTXInst<(outs regclass:$result), (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
"ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];", []>;
-def LD_GLOBAL_NC_i8 : LDG_G<Int16Regs>;
-def LD_GLOBAL_NC_i16 : LDG_G<Int16Regs>;
-def LD_GLOBAL_NC_i32 : LDG_G<Int32Regs>;
-def LD_GLOBAL_NC_i64 : LDG_G<Int64Regs>;
+def LD_GLOBAL_NC_i8 : LDG_G<B16>;
+def LD_GLOBAL_NC_i16 : LDG_G<B16>;
+def LD_GLOBAL_NC_i32 : LDG_G<B32>;
+def LD_GLOBAL_NC_i64 : LDG_G<B64>;
// vector
@@ -2213,33 +2142,33 @@ class VLDG_G_ELE_V8<NVPTXRegClass regclass> :
"ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
-def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2<Int16Regs>;
-def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2<Int16Regs>;
-def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2<Int32Regs>;
-def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2<Int64Regs>;
+def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2<B16>;
+def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2<B16>;
+def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2<B32>;
+def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2<B64>;
-def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4<Int16Regs>;
-def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4<Int16Regs>;
-def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4<Int32Regs>;
+def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4<B16>;
+def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4<B16>;
+def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4<B32>;
-def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4<Int64Regs>;
-def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8<Int32Regs>;
+def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4<B64>;
+def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8<B32>;
multiclass NG_TO_G<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
if Supports32 then
- def "" : BasicNVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ def "" : BasicNVPTXInst<(outs B32:$result), (ins B32:$src),
"cvta." # Str # ".u32", []>, Requires<Preds>;
- def _64 : BasicNVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ def _64 : BasicNVPTXInst<(outs B64:$result), (ins B64:$src),
"cvta." # Str # ".u64", []>, Requires<Preds>;
}
multiclass G_TO_NG<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
if Supports32 then
- def "" : BasicNVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
+ def "" : BasicNVPTXInst<(outs B32:$result), (ins B32:$src),
"cvta.to." # Str # ".u32", []>, Requires<Preds>;
- def _64 : BasicNVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
+ def _64 : BasicNVPTXInst<(outs B64:$result), (ins B64:$src),
"cvta.to." # Str # ".u64", []>, Requires<Preds>;
}
@@ -2253,64 +2182,64 @@ defm cvta_to_shared_cluster : G_TO_NG<"shared::cluster", false, [hasClusters]>;
// nvvm.move intrinsicc
-def nvvm_move_i16 : BasicNVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
+def nvvm_move_i16 : BasicNVPTXInst<(outs B16:$r), (ins B16:$s),
"mov.b16",
[(set i16:$r,
(int_nvvm_move_i16 i16:$s))]>;
-def nvvm_move_i32 : BasicNVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
+def nvvm_move_i32 : BasicNVPTXInst<(outs B32:$r), (ins B32:$s),
"mov.b32",
[(set i32:$r,
(int_nvvm_move_i32 i32:$s))]>;
-def nvvm_move_i64 : BasicNVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
+def nvvm_move_i64 : BasicNVPTXInst<(outs B64:$r), (ins B64:$s),
"mov.b64",
[(set i64:$r,
(int_nvvm_move_i64 i64:$s))]>;
-def nvvm_move_float : BasicNVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
+def nvvm_move_float : BasicNVPTXInst<(outs B32:$r), (ins B32:$s),
"mov.f32",
[(set f32:$r,
(int_nvvm_move_float f32:$s))]>;
-def nvvm_move_double : BasicNVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
+def nvvm_move_double : BasicNVPTXInst<(outs B64:$r), (ins B64:$s),
"mov.f64",
[(set f64:$r,
(int_nvvm_move_double f64:$s))]>;
-def nvvm_move_ptr32 : BasicNVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
+def nvvm_move_ptr32 : BasicNVPTXInst<(outs B32:$r), (ins B32:$s),
"mov.u32",
[(set i32:$r,
(int_nvvm_move_ptr i32:$s))]>;
-def nvvm_move_ptr64 : BasicNVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
+def nvvm_move_ptr64 : BasicNVPTXInst<(outs B64:$r), (ins B64:$s),
"mov.u64",
[(set i64:$r,
(int_nvvm_move_ptr i64:$s))]>;
// @TODO: Are these actually needed, or will we always just see symbols
// copied to registers first?
-/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins ADDR_base:$s),
+/*def nvvm_move_sym32 : NVPTXInst<(outs B32:$r), (ins ADDR_base:$s),
"mov.u32 \t$r, $s;",
- [(set Int32Regs:$r,
+ [(set B32:$r,
(int_nvvm_move_ptr texternalsym:$s))]>;
-def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins ADDR_base:$s),
+def nvvm_move_sym64 : NVPTXInst<(outs B64:$r), (ins ADDR_base:$s),
"mov.u64 \t$r, $s;",
- [(set Int64Regs:$r,
+ [(set B64:$r,
(int_nvvm_move_ptr texternalsym:$s))]>;*/
def texsurf_handles
- : BasicNVPTXInst<(outs Int64Regs:$result), (ins ADDR_base:$src), "mov.u64">;
+ : BasicNVPTXInst<(outs B64:$result), (ins ADDR_base:$src), "mov.u64">;
//-----------------------------------
// Compiler Error Warn
// - Just ignore them in codegen
//-----------------------------------
-def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
+def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins B32:$a),
"// llvm.nvvm.compiler.warn()",
[(int_nvvm_compiler_warn i32:$a)]>;
-def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
+def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins B64:$a),
"// llvm.nvvm.compiler.warn()",
[(int_nvvm_compiler_warn i64:$a)]>;
-def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
+def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins B32:$a),
"// llvm.nvvm.compiler.error()",
[(int_nvvm_compiler_error i32:$a)]>;
-def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
+def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins B64:$a),
"// llvm.nvvm.compiler.error()",
[(int_nvvm_compiler_error i64:$a)]>;
@@ -2318,11 +2247,11 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
// isspacep
multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
- def _32: BasicNVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ def _32: BasicNVPTXInst<(outs B1:$d), (ins B32:$a),
"isspacep." # suffix,
[(set i1:$d, (Intr i32:$a))]>,
Requires<Preds>;
- def _64: BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ def _64: BasicNVPTXInst<(outs B1:$d), (ins B64:$a),
"isspacep." # suffix,
[(set i1:$d, (Intr i64:$a))]>,
Requires<Preds>;
@@ -2337,7 +2266,7 @@ defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
[hasPTX<78>, hasSM<90>]>;
// Special register reads
-def MOV_SPECIAL : BasicNVPTXInst<(outs Int32Regs:$d),
+def MOV_SPECIAL : BasicNVPTXInst<(outs B32:$d),
(ins SpecialRegs:$r),
"mov.b32", []>;
@@ -2385,757 +2314,514 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
-class TEX_1D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x)),
+class TEX_1D_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
pattern>;
-multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
- Intrinsic intr> {
- def _RR : TEX_1D_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x))]>;
- def _RI : TEX_1D_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_1D_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_1D_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
-}
-
-defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_1d_v4f32_s32>;
-defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_1d_v4f32_f32>;
-defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_1d_v4s32_s32>;
-defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_v4s32_f32>;
-defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_1d_v4u32_s32>;
-defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_v4u32_f32>;
-
-class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$lod)),
+multiclass TEX_1D<string inst, Intrinsic intr> {
+ def _RR : TEX_1D_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x))]>;
+ def _RI : TEX_1D_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_1D_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_1D_base<inst, (ins i64imm:$t, i64imm:$s)>;
+}
+
+defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", int_nvvm_tex_1d_v4f32_s32>;
+defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", int_nvvm_tex_1d_v4f32_f32>;
+defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", int_nvvm_tex_1d_v4s32_s32>;
+defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", int_nvvm_tex_1d_v4s32_f32>;
+defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", int_nvvm_tex_1d_v4u32_s32>;
+defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", int_nvvm_tex_1d_v4u32_f32>;
+
+class TEX_1D_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
pattern>;
-multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_1D_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$lod))]>;
- def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_1D_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_1D_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$lod))]>;
+ def _RI : TEX_1D_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_1D_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_1D_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_1D_F32_F32_LEVEL :
- TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_1d_level_v4f32_f32>;
+ TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", int_nvvm_tex_1d_level_v4f32_f32>;
defm TEX_1D_S32_F32_LEVEL :
- TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_level_v4s32_f32>;
+ TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", int_nvvm_tex_1d_level_v4s32_f32>;
defm TEX_1D_U32_F32_LEVEL :
- TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_level_v4u32_f32>;
-
-class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
+ TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", int_nvvm_tex_1d_level_v4u32_f32>;
+
+class TEX_1D_GRAD_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$gradx, B32:$grady)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
" \\{$gradx\\}, \\{$grady\\};",
pattern>;
-multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_1D_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$gradx, intype:$grady))]>;
- def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_1D_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_1D_GRAD<string inst, Intrinsic intr> {
+ def _RR : TEX_1D_GRAD_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$gradx, B32:$grady))]>;
+ def _RI : TEX_1D_GRAD_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_1D_GRAD_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_1D_GRAD_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_1D_F32_F32_GRAD
- : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_1d_grad_v4f32_f32>;
+ : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", int_nvvm_tex_1d_grad_v4f32_f32>;
defm TEX_1D_S32_F32_GRAD
- : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_grad_v4s32_f32>;
+ : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", int_nvvm_tex_1d_grad_v4s32_f32>;
defm TEX_1D_U32_F32_GRAD
- : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_grad_v4u32_f32>;
-
-class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x)),
+ : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", int_nvvm_tex_1d_grad_v4u32_f32>;
+
+class TEX_1D_ARRAY_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
pattern>;
-multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_1D_ARRAY_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x))]>;
- def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_1D_ARRAY<string inst, Intrinsic intr> {
+ def _RR : TEX_1D_ARRAY_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x))]>;
+ def _RI : TEX_1D_ARRAY_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_1D_ARRAY_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_1D_ARRAY_F32_F32
- : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_1d_array_v4f32_f32>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", int_nvvm_tex_1d_array_v4f32_f32>;
defm TEX_1D_ARRAY_F32_S32
- : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_1d_array_v4f32_s32>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", int_nvvm_tex_1d_array_v4f32_s32>;
defm TEX_1D_ARRAY_S32_S32
- : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_1d_array_v4s32_s32>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", int_nvvm_tex_1d_array_v4s32_s32>;
defm TEX_1D_ARRAY_S32_F32
- : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_array_v4s32_f32>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", int_nvvm_tex_1d_array_v4s32_f32>;
defm TEX_1D_ARRAY_U32_S32
- : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_1d_array_v4u32_s32>;
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", int_nvvm_tex_1d_array_v4u32_s32>;
defm TEX_1D_ARRAY_U32_F32
- : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_array_v4u32_f32>;
-
-class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", int_nvvm_tex_1d_array_v4u32_f32>;
+
+class TEX_1D_ARRAY_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x\\}], $lod;",
pattern>;
-multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_1D_ARRAY_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$lod))]>;
- def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_1D_ARRAY_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_1D_ARRAY_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x, B32:$lod))]>;
+ def _RI : TEX_1D_ARRAY_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_1D_ARRAY_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_1D_ARRAY_F32_F32_LEVEL
- : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_1d_array_level_v4f32_f32>;
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", int_nvvm_tex_1d_array_level_v4f32_f32>;
defm TEX_1D_ARRAY_S32_F32_LEVEL
- : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_array_level_v4s32_f32>;
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", int_nvvm_tex_1d_array_level_v4s32_f32>;
defm TEX_1D_ARRAY_U32_F32_LEVEL
- : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_array_level_v4u32_f32>;
-
-class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x,
- intype:$gradx, intype:$grady)),
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", int_nvvm_tex_1d_array_level_v4u32_f32>;
+
+class TEX_1D_ARRAY_GRAD_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$gradx, B32:$grady)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
" \\{$gradx\\}, \\{$grady\\};",
pattern>;
-multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_1D_ARRAY_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x,
- intype:$gradx, intype:$grady))]>;
- def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_1D_ARRAY_GRAD<string inst, Intrinsic intr> {
+ def _RR : TEX_1D_ARRAY_GRAD_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x,
+ B32:$gradx, B32:$grady))]>;
+ def _RI : TEX_1D_ARRAY_GRAD_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_GRAD_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_1D_ARRAY_GRAD_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_1D_ARRAY_F32_F32_GRAD
- : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_1d_array_grad_v4f32_f32>;
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", int_nvvm_tex_1d_array_grad_v4f32_f32>;
defm TEX_1D_ARRAY_S32_F32_GRAD
- : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_array_grad_v4s32_f32>;
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", int_nvvm_tex_1d_array_grad_v4s32_f32>;
defm TEX_1D_ARRAY_U32_F32_GRAD
- : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_1d_array_grad_v4u32_f32>;
-
-class TEX_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y)),
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", int_nvvm_tex_1d_array_grad_v4u32_f32>;
+
+class TEX_2D_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
pattern>;
-multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
- Intrinsic intr> {
- def _RR : TEX_2D_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y))]>;
- def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
-}
-
-defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_2d_v4f32_f32>;
-defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_2d_v4f32_s32>;
-defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_2d_v4s32_s32>;
-defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_v4s32_f32>;
-defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_2d_v4u32_s32>;
-defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_v4u32_f32>;
-
-class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
+multiclass TEX_2D<string inst, Intrinsic intr> {
+ def _RR : TEX_2D_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y))]>;
+ def _RI : TEX_2D_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_2D_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_2D_base<inst, (ins i64imm:$t, i64imm:$s)>;
+}
+
+defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", int_nvvm_tex_2d_v4f32_f32>;
+defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", int_nvvm_tex_2d_v4f32_s32>;
+defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", int_nvvm_tex_2d_v4s32_s32>;
+defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", int_nvvm_tex_2d_v4s32_f32>;
+defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", int_nvvm_tex_2d_v4u32_s32>;
+defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", int_nvvm_tex_2d_v4u32_f32>;
+
+class TEX_2D_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y\\}], $lod;",
pattern>;
-multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_2D_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$lod))]>;
- def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_2D_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_2D_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y, B32:$lod))]>;
+ def _RI : TEX_2D_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_2D_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_2D_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_2D_F32_F32_LEVEL :
- TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_2d_level_v4f32_f32>;
+ TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", int_nvvm_tex_2d_level_v4f32_f32>;
defm TEX_2D_S32_F32_LEVEL :
- TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_level_v4s32_f32>;
+ TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", int_nvvm_tex_2d_level_v4s32_f32>;
defm TEX_2D_U32_F32_LEVEL :
- TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_level_v4u32_f32>;
-
-class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1)),
+ TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", int_nvvm_tex_2d_level_v4u32_f32>;
+
+class TEX_2D_GRAD_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
pattern>;
-multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_2D_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1))]>;
- def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_2D_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_2D_GRAD<string inst, Intrinsic intr> {
+ def _RR : TEX_2D_GRAD_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1))]>;
+ def _RI : TEX_2D_GRAD_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_2D_GRAD_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_2D_GRAD_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_2D_F32_F32_GRAD :
- TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_2d_grad_v4f32_f32>;
+ TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", int_nvvm_tex_2d_grad_v4f32_f32>;
defm TEX_2D_S32_F32_GRAD :
- TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_grad_v4s32_f32>;
+ TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", int_nvvm_tex_2d_grad_v4s32_f32>;
defm TEX_2D_U32_F32_GRAD :
- TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_grad_v4u32_f32>;
-
-class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", int_nvvm_tex_2d_grad_v4u32_f32>;
+
+class TEX_2D_ARRAY_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$y)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $y\\}];",
pattern>;
-multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_2D_ARRAY_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y))]>;
- def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_2D_ARRAY<string inst, Intrinsic intr> {
+ def _RR : TEX_2D_ARRAY_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x, B32:$y))]>;
+ def _RI : TEX_2D_ARRAY_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_2D_ARRAY_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_2D_ARRAY_F32_F32
- : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_2d_array_v4f32_f32>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", int_nvvm_tex_2d_array_v4f32_f32>;
defm TEX_2D_ARRAY_F32_S32
- : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_2d_array_v4f32_s32>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", int_nvvm_tex_2d_array_v4f32_s32>;
defm TEX_2D_ARRAY_S32_S32
- : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_2d_array_v4s32_s32>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", int_nvvm_tex_2d_array_v4s32_s32>;
defm TEX_2D_ARRAY_S32_F32
- : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_array_v4s32_f32>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", int_nvvm_tex_2d_array_v4s32_f32>;
defm TEX_2D_ARRAY_U32_S32
- : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_2d_array_v4u32_s32>;
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", int_nvvm_tex_2d_array_v4u32_s32>;
defm TEX_2D_ARRAY_U32_F32
- : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_array_v4u32_f32>;
-
-class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
- intype:$lod)),
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", int_nvvm_tex_2d_array_v4u32_f32>;
+
+class TEX_2D_ARRAY_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$y, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
pattern>;
-multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_2D_ARRAY_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$lod))]>;
- def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_2D_ARRAY_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_2D_ARRAY_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x, B32:$y, B32:$lod))]>;
+ def _RI : TEX_2D_ARRAY_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_2D_ARRAY_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_2D_ARRAY_F32_F32_LEVEL
- : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_2d_array_level_v4f32_f32>;
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", int_nvvm_tex_2d_array_level_v4f32_f32>;
defm TEX_2D_ARRAY_S32_F32_LEVEL
- : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_array_level_v4s32_f32>;
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", int_nvvm_tex_2d_array_level_v4s32_f32>;
defm TEX_2D_ARRAY_U32_F32_LEVEL
- : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_array_level_v4u32_f32>;
-
-class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1)),
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", int_nvvm_tex_2d_array_level_v4u32_f32>;
+
+class TEX_2D_ARRAY_GRAD_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
pattern>;
-multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_2D_ARRAY_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1))]>;
- def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_2D_ARRAY_GRAD<string inst, Intrinsic intr> {
+ def _RR : TEX_2D_ARRAY_GRAD_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1))]>;
+ def _RI : TEX_2D_ARRAY_GRAD_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_GRAD_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_2D_ARRAY_GRAD_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_2D_ARRAY_F32_F32_GRAD
- : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_2d_array_grad_v4f32_f32>;
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", int_nvvm_tex_2d_array_grad_v4f32_f32>;
defm TEX_2D_ARRAY_S32_F32_GRAD
- : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_array_grad_v4s32_f32>;
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", int_nvvm_tex_2d_array_grad_v4s32_f32>;
defm TEX_2D_ARRAY_U32_F32_GRAD
- : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_2d_array_grad_v4u32_f32>;
-
-class TEX_3D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", int_nvvm_tex_2d_array_grad_v4u32_f32>;
+
+class TEX_3D_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y, B32:$z)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}];",
pattern>;
-multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
- Intrinsic intr> {
- def _RR : TEX_3D_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>;
- def _RI : TEX_3D_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_3D_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_3D_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
-}
-
-defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_3d_v4f32_f32>;
-defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_3d_v4f32_s32>;
-defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_3d_v4s32_s32>;
-defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_3d_v4s32_f32>;
-defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_3d_v4u32_s32>;
-defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_3d_v4u32_f32>;
-
-class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
- intype:$lod)),
+multiclass TEX_3D<string inst, Intrinsic intr> {
+ def _RR : TEX_3D_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y, B32:$z))]>;
+ def _RI : TEX_3D_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_3D_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_3D_base<inst, (ins i64imm:$t, i64imm:$s)>;
+}
+
+defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", int_nvvm_tex_3d_v4f32_f32>;
+defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", int_nvvm_tex_3d_v4f32_s32>;
+defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", int_nvvm_tex_3d_v4s32_s32>;
+defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", int_nvvm_tex_3d_v4s32_f32>;
+defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", int_nvvm_tex_3d_v4u32_s32>;
+defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", int_nvvm_tex_3d_v4u32_f32>;
+
+class TEX_3D_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y, B32:$z, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
pattern>;
-multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_3D_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
- intype:$lod))]>;
- def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_3D_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_3D_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y, B32:$z, B32:$lod))]>;
+ def _RI : TEX_3D_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_3D_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_3D_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_3D_F32_F32_LEVEL
- : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_3d_level_v4f32_f32>;
+ : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", int_nvvm_tex_3d_level_v4f32_f32>;
defm TEX_3D_S32_F32_LEVEL
- : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_3d_level_v4s32_f32>;
+ : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", int_nvvm_tex_3d_level_v4s32_f32>;
defm TEX_3D_U32_F32_LEVEL
- : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_3d_level_v4u32_f32>;
-
-class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
- intype :$gradx0, intype:$gradx1,
- intype:$gradx2, intype:$grady0,
- intype:$grady1, intype:$grady2)),
+ : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", int_nvvm_tex_3d_level_v4u32_f32>;
+
+class TEX_3D_GRAD_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1,
+ B32:$gradx2, B32:$grady0,
+ B32:$grady1, B32:$grady2)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
pattern>;
-multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_3D_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1, intype:$gradx2,
- intype:$grady0, intype:$grady1, intype:$grady2))]>;
- def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_3D_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_3D_GRAD<string inst, Intrinsic intr> {
+ def _RR : TEX_3D_GRAD_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1, B32:$gradx2,
+ B32:$grady0, B32:$grady1, B32:$grady2))]>;
+ def _RI : TEX_3D_GRAD_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_3D_GRAD_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_3D_GRAD_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_3D_F32_F32_GRAD
- : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_3d_grad_v4f32_f32>;
+ : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", int_nvvm_tex_3d_grad_v4f32_f32>;
defm TEX_3D_S32_F32_GRAD
- : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_3d_grad_v4s32_f32>;
+ : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", int_nvvm_tex_3d_grad_v4s32_f32>;
defm TEX_3D_U32_F32_GRAD
- : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_3d_grad_v4u32_f32>;
-
-class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", int_nvvm_tex_3d_grad_v4u32_f32>;
+
+class TEX_CUBE_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y, B32:$z)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}];",
pattern>;
-multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
- Intrinsic intr> {
- def _RR : TEX_CUBE_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>;
- def _RI : TEX_CUBE_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_CUBE_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_CUBE_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_CUBE<string inst, Intrinsic intr> {
+ def _RR : TEX_CUBE_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y, B32:$z))]>;
+ def _RI : TEX_CUBE_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_CUBE_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_CUBE_F32_F32
- : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_cube_v4f32_f32>;
+ : TEX_CUBE<"tex.cube.v4.f32.f32", int_nvvm_tex_cube_v4f32_f32>;
defm TEX_CUBE_S32_F32
- : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_cube_v4s32_f32>;
+ : TEX_CUBE<"tex.cube.v4.s32.f32", int_nvvm_tex_cube_v4s32_f32>;
defm TEX_CUBE_U32_F32
- : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_cube_v4u32_f32>;
-
-class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
- intype:$lod)),
+ : TEX_CUBE<"tex.cube.v4.u32.f32", int_nvvm_tex_cube_v4u32_f32>;
+
+class TEX_CUBE_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$x, B32:$y, B32:$z, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
pattern>;
-multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_CUBE_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z,
- intype:$lod))]>;
- def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_CUBE_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_CUBE_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y, B32:$z,
+ B32:$lod))]>;
+ def _RI : TEX_CUBE_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_CUBE_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_CUBE_F32_F32_LEVEL
- : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_cube_level_v4f32_f32>;
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", int_nvvm_tex_cube_level_v4f32_f32>;
defm TEX_CUBE_S32_F32_LEVEL
- : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_cube_level_v4s32_f32>;
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", int_nvvm_tex_cube_level_v4s32_f32>;
defm TEX_CUBE_U32_F32_LEVEL
- : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_cube_level_v4u32_f32>;
-
-class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
- intype:$z)),
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", int_nvvm_tex_cube_level_v4u32_f32>;
+
+class TEX_CUBE_ARRAY_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$y, B32:$z)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $z\\}];",
pattern>;
-multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_CUBE_ARRAY_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z))]>;
- def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_CUBE_ARRAY<string inst, Intrinsic intr> {
+ def _RR : TEX_CUBE_ARRAY_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x, B32:$y, B32:$z))]>;
+ def _RI : TEX_CUBE_ARRAY_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_CUBE_ARRAY_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_CUBE_ARRAY_F32_F32
- : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_cube_array_v4f32_f32>;
+ : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", int_nvvm_tex_cube_array_v4f32_f32>;
defm TEX_CUBE_ARRAY_S32_F32
- : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_cube_array_v4s32_f32>;
+ : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", int_nvvm_tex_cube_array_v4s32_f32>;
defm TEX_CUBE_ARRAY_U32_F32
- : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_cube_array_v4u32_f32>;
-
-class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
- intype:$z, intype:$lod)),
+ : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", int_nvvm_tex_cube_array_v4u32_f32>;
+
+class TEX_CUBE_ARRAY_LEVEL_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(texsamp, (ins B32:$l, B32:$x, B32:$y, B32:$z, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
pattern>;
-multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _RR : TEX_CUBE_ARRAY_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z,
- intype:$lod))]>;
- def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TEX_CUBE_ARRAY_LEVEL<string inst, Intrinsic intr> {
+ def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i64:$s, B32:$l, B32:$x, B32:$y, B32:$z,
+ B32:$lod))]>;
+ def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TEX_CUBE_ARRAY_F32_F32_LEVEL
: TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_cube_array_level_v4f32_f32>;
defm TEX_CUBE_ARRAY_S32_F32_LEVEL
: TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_cube_array_level_v4s32_f32>;
defm TEX_CUBE_ARRAY_U32_F32_LEVEL
: TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_cube_array_level_v4u32_f32>;
-class TLD4_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag texsamp, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$v0, outtype:$v1,
- outtype:$v2, outtype:$v3),
- !con(texsamp, (ins intype:$x, intype:$y)),
+class TLD4_2D_base<string inst, dag texsamp, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$v0, B32:$v1, B32:$v2, B32:$v3),
+ !con(texsamp, (ins B32:$x, B32:$y)),
inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
pattern>;
-multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype,
- Intrinsic intr> {
- def _RR : TLD4_2D_base<
- inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s),
- [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3,
- (intr i64:$t, i64:$s, intype:$x, intype:$y))]>;
- def _RI : TLD4_2D_base<inst, outtype, intype,
- (ins Int64Regs:$t, i64imm:$s)>;
- def _IR : TLD4_2D_base<inst, outtype, intype,
- (ins i64imm:$t, Int64Regs:$s)>;
- def _II : TLD4_2D_base<inst, outtype, intype,
- (ins i64imm:$t, i64imm:$s)>;
+multiclass TLD4_2D<string inst, Intrinsic intr> {
+ def _RR : TLD4_2D_base<inst, (ins B64:$t, B64:$s),
+ [(set B32:$v0, B32:$v1, B32:$v2, B32:$v3,
+ (intr i64:$t, i64:$s, B32:$x, B32:$y))]>;
+ def _RI : TLD4_2D_base<inst, (ins B64:$t, i64imm:$s)>;
+ def _IR : TLD4_2D_base<inst, (ins i64imm:$t, B64:$s)>;
+ def _II : TLD4_2D_base<inst, (ins i64imm:$t, i64imm:$s)>;
}
defm TLD4_R_2D_F32_F32
- : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_r_2d_v4f32_f32>;
+ : TLD4_2D<"tld4.r.2d.v4.f32.f32", int_nvvm_tld4_r_2d_v4f32_f32>;
defm TLD4_G_2D_F32_F32
- : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_g_2d_v4f32_f32>;
+ : TLD4_2D<"tld4.g.2d.v4.f32.f32", int_nvvm_tld4_g_2d_v4f32_f32>;
defm TLD4_B_2D_F32_F32
- : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_b_2d_v4f32_f32>;
+ : TLD4_2D<"tld4.b.2d.v4.f32.f32", int_nvvm_tld4_b_2d_v4f32_f32>;
defm TLD4_A_2D_F32_F32
- : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_a_2d_v4f32_f32>;
+ : TLD4_2D<"tld4.a.2d.v4.f32.f32", int_nvvm_tld4_a_2d_v4f32_f32>;
defm TLD4_R_2D_S32_F32
- : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_r_2d_v4s32_f32>;
+ : TLD4_2D<"tld4.r.2d.v4.s32.f32", int_nvvm_tld4_r_2d_v4s32_f32>;
defm TLD4_G_2D_S32_F32
- : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_g_2d_v4s32_f32>;
+ : TLD4_2D<"tld4.g.2d.v4.s32.f32", int_nvvm_tld4_g_2d_v4s32_f32>;
defm TLD4_B_2D_S32_F32
- : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_b_2d_v4s32_f32>;
+ : TLD4_2D<"tld4.b.2d.v4.s32.f32", int_nvvm_tld4_b_2d_v4s32_f32>;
defm TLD4_A_2D_S32_F32
- : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_a_2d_v4s32_f32>;
+ : TLD4_2D<"tld4.a.2d.v4.s32.f32", int_nvvm_tld4_a_2d_v4s32_f32>;
defm TLD4_R_2D_U32_F32
- : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_r_2d_v4u32_f32>;
+ : TLD4_2D<"tld4.r.2d.v4.u32.f32", int_nvvm_tld4_r_2d_v4u32_f32>;
defm TLD4_G_2D_U32_F32
- : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_g_2d_v4u32_f32>;
+ : TLD4_2D<"tld4.g.2d.v4.u32.f32", int_nvvm_tld4_g_2d_v4u32_f32>;
defm TLD4_B_2D_U32_F32
- : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_b_2d_v4u32_f32>;
+ : TLD4_2D<"tld4.b.2d.v4.u32.f32", int_nvvm_tld4_b_2d_v4u32_f32>;
defm TLD4_A_2D_U32_F32
- : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_a_2d_v4u32_f32>;
+ : TLD4_2D<"tld4.a.2d.v4.u32.f32", int_nvvm_tld4_a_2d_v4u32_f32>;
}
@@ -3144,754 +2830,542 @@ defm TLD4_A_2D_U32_F32
let IsTex = true, IsTexModeUnified = true in {
// Texture fetch instructions using handles
-class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x)),
+class TEX_UNIFIED_1D_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
pattern>;
-multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_1D_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x))]>;
- def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_1D<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a, (intr i64:$t, B32:$x))]>;
+ def _I : TEX_UNIFIED_1D_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_F32_S32
- : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_unified_1d_v4f32_s32>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", int_nvvm_tex_unified_1d_v4f32_s32>;
defm TEX_UNIFIED_1D_F32_F32
- : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_v4f32_f32>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", int_nvvm_tex_unified_1d_v4f32_f32>;
defm TEX_UNIFIED_1D_S32_S32
- : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_1d_v4s32_s32>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", int_nvvm_tex_unified_1d_v4s32_s32>;
defm TEX_UNIFIED_1D_S32_F32
- : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_v4s32_f32>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", int_nvvm_tex_unified_1d_v4s32_f32>;
defm TEX_UNIFIED_1D_U32_S32
- : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_1d_v4u32_s32>;
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", int_nvvm_tex_unified_1d_v4u32_s32>;
defm TEX_UNIFIED_1D_U32_F32
- : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_v4u32_f32>;
-
-class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$lod)),
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", int_nvvm_tex_unified_1d_v4u32_f32>;
+
+class TEX_UNIFIED_1D_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_1D_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$lod))]>;
- def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_1D_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$lod))]>;
+ def _I : TEX_UNIFIED_1D_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_F32_F32_LEVEL
- : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_level_v4f32_f32>;
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", int_nvvm_tex_unified_1d_level_v4f32_f32>;
defm TEX_UNIFIED_1D_S32_F32_LEVEL
- : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_level_v4s32_f32>;
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", int_nvvm_tex_unified_1d_level_v4s32_f32>;
defm TEX_UNIFIED_1D_U32_F32_LEVEL
- : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_level_v4u32_f32>;
-
-class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", int_nvvm_tex_unified_1d_level_v4u32_f32>;
+
+class TEX_UNIFIED_1D_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$gradx, B32:$grady)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
pattern>;
-multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_1D_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$gradx, intype:$grady))]>;
- def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_1D_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$gradx, B32:$grady))]>;
+ def _I : TEX_UNIFIED_1D_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_F32_F32_GRAD
- : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_grad_v4f32_f32>;
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", int_nvvm_tex_unified_1d_grad_v4f32_f32>;
defm TEX_UNIFIED_1D_S32_F32_GRAD
- : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_grad_v4s32_f32>;
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", int_nvvm_tex_unified_1d_grad_v4s32_f32>;
defm TEX_UNIFIED_1D_U32_F32_GRAD
- : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_grad_v4u32_f32>;
-
-class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x)),
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", int_nvvm_tex_unified_1d_grad_v4u32_f32>;
+
+class TEX_UNIFIED_1D_ARRAY_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
pattern>;
-multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_1D_ARRAY_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x))]>;
- def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_1D_ARRAY<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_ARRAY_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a, (intr i64:$t, B32:$l, B32:$x))]>;
+ def _I : TEX_UNIFIED_1D_ARRAY_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_ARRAY_F32_S32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_unified_1d_array_v4f32_s32>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", int_nvvm_tex_unified_1d_array_v4f32_s32>;
defm TEX_UNIFIED_1D_ARRAY_F32_F32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_array_v4f32_f32>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", int_nvvm_tex_unified_1d_array_v4f32_f32>;
defm TEX_UNIFIED_1D_ARRAY_S32_S32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_1d_array_v4s32_s32>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", int_nvvm_tex_unified_1d_array_v4s32_s32>;
defm TEX_UNIFIED_1D_ARRAY_S32_F32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_array_v4s32_f32>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", int_nvvm_tex_unified_1d_array_v4s32_f32>;
defm TEX_UNIFIED_1D_ARRAY_U32_S32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_1d_array_v4u32_s32>;
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", int_nvvm_tex_unified_1d_array_v4u32_s32>;
defm TEX_UNIFIED_1D_ARRAY_U32_F32
- : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_1d_array_v4u32_f32>;
-
-class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", int_nvvm_tex_unified_1d_array_v4u32_f32>;
+
+class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$lod))]>;
- def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$l, B32:$x, B32:$lod))]>;
+ def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
: TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_1d_array_level_v4f32_f32>;
defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
: TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_1d_array_level_v4s32_f32>;
defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_1d_array_level_v4u32_f32>;
-class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x,
- intype:$gradx, intype:$grady)),
+class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$gradx, B32:$grady)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
pattern>;
-multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$gradx, intype:$grady))]>;
- def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$l, B32:$x, B32:$gradx, B32:$grady))]>;
+ def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
: TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_1d_array_grad_v4f32_f32>;
defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
: TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_1d_array_grad_v4s32_f32>;
defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
: TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_1d_array_grad_v4u32_f32>;
-class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y)),
+class TEX_UNIFIED_2D_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
pattern>;
-multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_2D_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y))]>;
- def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_2D<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y))]>;
+ def _I : TEX_UNIFIED_2D_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_F32_S32
- : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_unified_2d_v4f32_s32>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", int_nvvm_tex_unified_2d_v4f32_s32>;
defm TEX_UNIFIED_2D_F32_F32
- : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_v4f32_f32>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", int_nvvm_tex_unified_2d_v4f32_f32>;
defm TEX_UNIFIED_2D_S32_S32
- : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_2d_v4s32_s32>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", int_nvvm_tex_unified_2d_v4s32_s32>;
defm TEX_UNIFIED_2D_S32_F32
- : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_v4s32_f32>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", int_nvvm_tex_unified_2d_v4s32_f32>;
defm TEX_UNIFIED_2D_U32_S32
- : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_2d_v4u32_s32>;
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", int_nvvm_tex_unified_2d_v4u32_s32>;
defm TEX_UNIFIED_2D_U32_F32
- : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_v4u32_f32>;
-
-class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", int_nvvm_tex_unified_2d_v4u32_f32>;
+
+class TEX_UNIFIED_2D_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_2D_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$lod))]>;
- def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_2D_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$lod))]>;
+ def _I : TEX_UNIFIED_2D_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_F32_F32_LEVEL
- : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_level_v4f32_f32>;
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", int_nvvm_tex_unified_2d_level_v4f32_f32>;
defm TEX_UNIFIED_2D_S32_F32_LEVEL
- : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_level_v4s32_f32>;
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", int_nvvm_tex_unified_2d_level_v4s32_f32>;
defm TEX_UNIFIED_2D_U32_F32_LEVEL
- : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_level_v4u32_f32>;
-
-class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1)),
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", int_nvvm_tex_unified_2d_level_v4u32_f32>;
+
+class TEX_UNIFIED_2D_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
pattern>;
-multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_2D_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1))]>;
- def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_2D_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1))]>;
+ def _I : TEX_UNIFIED_2D_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_F32_F32_GRAD
- : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_grad_v4f32_f32>;
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", int_nvvm_tex_unified_2d_grad_v4f32_f32>;
defm TEX_UNIFIED_2D_S32_F32_GRAD
- : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_grad_v4s32_f32>;
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", int_nvvm_tex_unified_2d_grad_v4s32_f32>;
defm TEX_UNIFIED_2D_U32_F32_GRAD
- : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_grad_v4u32_f32>;
-
-class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", int_nvvm_tex_unified_2d_grad_v4u32_f32>;
+
+class TEX_UNIFIED_2D_ARRAY_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$y)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
pattern>;
-multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_2D_ARRAY_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$y))]>;
- def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_2D_ARRAY<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_ARRAY_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$l, B32:$x, B32:$y))]>;
+ def _I : TEX_UNIFIED_2D_ARRAY_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_ARRAY_F32_S32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_unified_2d_array_v4f32_s32>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", int_nvvm_tex_unified_2d_array_v4f32_s32>;
defm TEX_UNIFIED_2D_ARRAY_F32_F32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_array_v4f32_f32>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", int_nvvm_tex_unified_2d_array_v4f32_f32>;
defm TEX_UNIFIED_2D_ARRAY_S32_S32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_2d_array_v4s32_s32>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", int_nvvm_tex_unified_2d_array_v4s32_s32>;
defm TEX_UNIFIED_2D_ARRAY_S32_F32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_array_v4s32_f32>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", int_nvvm_tex_unified_2d_array_v4s32_f32>;
defm TEX_UNIFIED_2D_ARRAY_U32_S32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_2d_array_v4u32_s32>;
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", int_nvvm_tex_unified_2d_array_v4u32_s32>;
defm TEX_UNIFIED_2D_ARRAY_U32_F32
- : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_2d_array_v4u32_f32>;
-
-class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
- intype:$lod)),
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", int_nvvm_tex_unified_2d_array_v4u32_f32>;
+
+class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$l, $x, $y, $y\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$lod))]>;
- def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$l, B32:$x, B32:$y, B32:$lod))]>;
+ def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
: TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_2d_array_level_v4f32_f32>;
defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
: TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_2d_array_level_v4s32_f32>;
defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_2d_array_level_v4u32_f32>;
-class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1)),
+class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
" \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
pattern>;
-multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$y,
- intype:$gradx0, intype:$gradx1,
- intype:$grady0, intype:$grady1))]>;
- def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$l, B32:$x, B32:$y,
+ B32:$gradx0, B32:$gradx1,
+ B32:$grady0, B32:$grady1))]>;
+ def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
: TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_2d_array_grad_v4f32_f32>;
defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
: TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_2d_array_grad_v4s32_f32>;
defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
: TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_2d_array_grad_v4u32_f32>;
-class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex, list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+class TEX_UNIFIED_3D_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$z)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
pattern>;
-multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_3D_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$z))]>;
- def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_3D<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_3D_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$z))]>;
+ def _I : TEX_UNIFIED_3D_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_3D_F32_S32
- : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs,
- int_nvvm_tex_unified_3d_v4f32_s32>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", int_nvvm_tex_unified_3d_v4f32_s32>;
defm TEX_UNIFIED_3D_F32_F32
- : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_v4f32_f32>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", int_nvvm_tex_unified_3d_v4f32_f32>;
defm TEX_UNIFIED_3D_S32_S32
- : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_3d_v4s32_s32>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", int_nvvm_tex_unified_3d_v4s32_s32>;
defm TEX_UNIFIED_3D_S32_F32
- : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_v4s32_f32>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", int_nvvm_tex_unified_3d_v4s32_f32>;
defm TEX_UNIFIED_3D_U32_S32
- : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs,
- int_nvvm_tex_unified_3d_v4u32_s32>;
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", int_nvvm_tex_unified_3d_v4u32_s32>;
defm TEX_UNIFIED_3D_U32_F32
- : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_v4u32_f32>;
-
-class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", int_nvvm_tex_unified_3d_v4u32_f32>;
+
+class TEX_UNIFIED_3D_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$z, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$x, $y, $z, $z\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_3D_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
- def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_3D_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_3D_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$z, B32:$lod))]>;
+ def _I : TEX_UNIFIED_3D_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_3D_F32_F32_LEVEL
- : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_level_v4f32_f32>;
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", int_nvvm_tex_unified_3d_level_v4f32_f32>;
defm TEX_UNIFIED_3D_S32_F32_LEVEL
- : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_level_v4s32_f32>;
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", int_nvvm_tex_unified_3d_level_v4s32_f32>;
defm TEX_UNIFIED_3D_U32_F32_LEVEL
- : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_level_v4u32_f32>;
-
-class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1,
- intype:$gradx2, intype:$grady0,
- intype:$grady1, intype:$grady2)),
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", int_nvvm_tex_unified_3d_level_v4u32_f32>;
+
+class TEX_UNIFIED_3D_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1,
+ B32:$gradx2, B32:$grady0,
+ B32:$grady1, B32:$grady2)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
pattern>;
-multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_3D_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1, intype:$gradx2,
- intype:$grady0, intype:$grady1, intype:$grady2))]>;
- def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_3D_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_3D_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1, B32:$gradx2,
+ B32:$grady0, B32:$grady1, B32:$grady2))]>;
+ def _I : TEX_UNIFIED_3D_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_3D_F32_F32_GRAD
- : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_grad_v4f32_f32>;
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", int_nvvm_tex_unified_3d_grad_v4f32_f32>;
defm TEX_UNIFIED_3D_S32_F32_GRAD
- : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_grad_v4s32_f32>;
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", int_nvvm_tex_unified_3d_grad_v4s32_f32>;
defm TEX_UNIFIED_3D_U32_F32_GRAD
- : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_3d_grad_v4u32_f32>;
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", int_nvvm_tex_unified_3d_grad_v4u32_f32>;
-class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+class TEX_UNIFIED_CUBE_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$z)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
pattern>;
-multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_CUBE_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$z))]>;
- def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_CUBE<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$z))]>;
+ def _I : TEX_UNIFIED_CUBE_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_F32_F32
- : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_v4f32_f32>;
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", int_nvvm_tex_unified_cube_v4f32_f32>;
defm TEX_UNIFIED_CUBE_S32_F32
- : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_v4s32_f32>;
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", int_nvvm_tex_unified_cube_v4s32_f32>;
defm TEX_UNIFIED_CUBE_U32_F32
- : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_v4u32_f32>;
-
-class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", int_nvvm_tex_unified_cube_v4u32_f32>;
+
+class TEX_UNIFIED_CUBE_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$z, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$x, $y, $z, $z\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_CUBE_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
- def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$z, B32:$lod))]>;
+ def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
: TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_cube_level_v4f32_f32>;
defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
: TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_cube_level_v4s32_f32>;
defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
: TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_cube_level_v4u32_f32>;
-class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
+class TEX_UNIFIED_CUBE_ARRAY_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$z)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
pattern>;
-multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_CUBE_ARRAY_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z))]>;
- def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i32:$l, B32:$x, B32:$y, B32:$z))]>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
- : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_array_v4f32_f32>;
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", int_nvvm_tex_unified_cube_array_v4f32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
- : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_array_v4s32_f32>;
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", int_nvvm_tex_unified_cube_array_v4s32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
- : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_array_v4u32_f32>;
-
-class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
- intype:$lod)),
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", int_nvvm_tex_unified_cube_array_v4u32_f32>;
+
+class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$z, B32:$lod)),
inst # " \t\\{$r, $g, $b, $a\\},"
" [$t, \\{$l, $x, $y, $z\\}], $lod;",
pattern>;
-multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, intype:$lod))]>;
- def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i32:$l, B32:$x, B32:$y, B32:$z, B32:$lod))]>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_cube_array_level_v4f32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_cube_array_level_v4s32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_cube_array_level_v4u32_f32>;
-class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1,
- intype:$gradx2, intype:$grady0,
- intype:$grady1, intype:$grady2)),
+class TEX_UNIFIED_CUBE_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1,
+ B32:$gradx2, B32:$grady0,
+ B32:$grady1, B32:$grady2)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
pattern>;
-multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_CUBE_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1, intype:$gradx2,
- intype:$grady0, intype:$grady1, intype:$grady2))]>;
- def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_CUBE_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1, B32:$gradx2,
+ B32:$grady0, B32:$grady1, B32:$grady2))]>;
+ def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_F32_F32_GRAD
- : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_grad_v4f32_f32>;
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", int_nvvm_tex_unified_cube_grad_v4f32_f32>;
defm TEX_UNIFIED_CUBE_S32_F32_GRAD
- : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_grad_v4s32_f32>;
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", int_nvvm_tex_unified_cube_grad_v4s32_f32>;
defm TEX_UNIFIED_CUBE_U32_F32_GRAD
- : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tex_unified_cube_grad_v4u32_f32>;
-
-class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$r, outtype:$g,
- outtype:$b, outtype:$a),
- !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1,
- intype:$gradx2, intype:$grady0,
- intype:$grady1, intype:$grady2)),
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", int_nvvm_tex_unified_cube_grad_v4u32_f32>;
+
+class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$r, B32:$g, B32:$b, B32:$a),
+ !con(tex, (ins B32:$l, B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1,
+ B32:$gradx2, B32:$grady0,
+ B32:$grady1, B32:$grady2)),
inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
" \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
" \\{$grady0, $grady1, $grady2, $grady2\\};",
pattern>;
-multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
- (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z,
- intype:$gradx0, intype:$gradx1,
- intype:$gradx2, intype:$grady0,
- intype:$grady1, intype:$grady2))]>;
- def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
- (ins i64imm:$t)>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, Intrinsic intr> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, (ins B64:$t),
+ [(set B32:$r, B32:$g, B32:$b, B32:$a,
+ (intr i64:$t, i32:$l, B32:$x, B32:$y, B32:$z,
+ B32:$gradx0, B32:$gradx1, B32:$gradx2,
+ B32:$grady0, B32:$grady1, B32:$grady2))]>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, (ins i64imm:$t)>;
}
defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
- Float32Regs, Float32Regs,
int_nvvm_tex_unified_cube_array_grad_v4f32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_cube_array_grad_v4s32_f32>;
defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
: TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
- Int32Regs, Float32Regs,
int_nvvm_tex_unified_cube_array_grad_v4u32_f32>;
-class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, dag tex,
- list<dag> pattern = []>
- : NVPTXInst<(outs outtype:$v0, outtype:$v1,
- outtype:$v2, outtype:$v3),
- !con(tex, (ins intype:$x, intype:$y)),
+class TLD4_UNIFIED_2D_base<string inst, dag tex, list<dag> pattern = []>
+ : NVPTXInst<(outs B32:$v0, B32:$v1, B32:$v2, B32:$v3),
+ !con(tex, (ins B32:$x, B32:$y)),
inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
pattern>;
-multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
- NVPTXRegClass intype, Intrinsic intr> {
- def _R : TLD4_UNIFIED_2D_base<
- inst, outtype, intype, (ins Int64Regs:$t),
- [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3,
- (intr i64:$t, intype:$x, intype:$y))]>;
- def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
+multiclass TLD4_UNIFIED_2D<string inst, Intrinsic intr> {
+ def _R : TLD4_UNIFIED_2D_base<inst, (ins B64:$t),
+ [(set B32:$v0, B32:$v1, B32:$v2, B32:$v3,
+ (intr i64:$t, B32:$x, B32:$y))]>;
+ def _I : TLD4_UNIFIED_2D_base<inst, (ins i64imm:$t)>;
}
defm TLD4_UNIFIED_R_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_unified_r_2d_v4f32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", int_nvvm_tld4_unified_r_2d_v4f32_f32>;
defm TLD4_UNIFIED_G_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_unified_g_2d_v4f32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", int_nvvm_tld4_unified_g_2d_v4f32_f32>;
defm TLD4_UNIFIED_B_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_unified_b_2d_v4f32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", int_nvvm_tld4_unified_b_2d_v4f32_f32>;
defm TLD4_UNIFIED_A_2D_F32_F32
- : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs,
- int_nvvm_tld4_unified_a_2d_v4f32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", int_nvvm_tld4_unified_a_2d_v4f32_f32>;
defm TLD4_UNIFIED_R_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_r_2d_v4s32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", int_nvvm_tld4_unified_r_2d_v4s32_f32>;
defm TLD4_UNIFIED_G_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_g_2d_v4s32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", int_nvvm_tld4_unified_g_2d_v4s32_f32>;
defm TLD4_UNIFIED_B_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_b_2d_v4s32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", int_nvvm_tld4_unified_b_2d_v4s32_f32>;
defm TLD4_UNIFIED_A_2D_S32_F32
- : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_a_2d_v4s32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", int_nvvm_tld4_unified_a_2d_v4s32_f32>;
defm TLD4_UNIFIED_R_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_r_2d_v4u32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", int_nvvm_tld4_unified_r_2d_v4u32_f32>;
defm TLD4_UNIFIED_G_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_g_2d_v4u32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", int_nvvm_tld4_unified_g_2d_v4u32_f32>;
defm TLD4_UNIFIED_B_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_b_2d_v4u32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", int_nvvm_tld4_unified_b_2d_v4u32_f32>;
defm TLD4_UNIFIED_A_2D_U32_F32
- : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs,
- int_nvvm_tld4_unified_a_2d_v4u32_f32>;
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", int_nvvm_tld4_unified_a_2d_v4u32_f32>;
}
-
//=== Surface load instructions
let IsSuld = true in {
@@ -3899,162 +3373,150 @@ let IsSuld = true in {
class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
- !con(surf, (ins Int32Regs:$x)),
+ !con(surf, (ins B32:$x)),
inst # " \\{$r\\}, [$s, \\{$x\\}];",
pattern>;
multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_1D_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, (intr i64:$s, i32:$x))]>;
def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
-defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
-defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
-defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
+defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", B16>;
+defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", B16>;
+defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", B32>;
+defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", B64>;
-defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
-defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
-defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
-defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
+defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", B16>;
+defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", B16>;
+defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", B32>;
+defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", B64>;
-defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
-defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
-defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
-defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
+defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", B16>;
+defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", B16>;
+defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", B32>;
+defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", B64>;
class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
- !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ !con(surf, (ins B32:$l, B32:$x)),
inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
pattern>;
multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_1D_ARRAY_base<inst, outtype, (ins B64:$s),
[(set outtype:$r,
(intr i64:$s, i32:$l, i32:$x))]>;
def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_1D_ARRAY_I8_CLAMP
- : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
-defm SULD_1D_ARRAY_I16_CLAMP
- : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
-defm SULD_1D_ARRAY_I32_CLAMP
- : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
-defm SULD_1D_ARRAY_I64_CLAMP
- : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
-
-defm SULD_1D_ARRAY_I8_TRAP
- : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
-defm SULD_1D_ARRAY_I16_TRAP
- : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
-defm SULD_1D_ARRAY_I32_TRAP
- : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
-defm SULD_1D_ARRAY_I64_TRAP
- : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
-
-defm SULD_1D_ARRAY_I8_ZERO
- : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
-defm SULD_1D_ARRAY_I16_ZERO
- : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
-defm SULD_1D_ARRAY_I32_ZERO
- : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
-defm SULD_1D_ARRAY_I64_ZERO
- : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
+defm SULD_1D_ARRAY_I8_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", B16>;
+defm SULD_1D_ARRAY_I16_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", B16>;
+defm SULD_1D_ARRAY_I32_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", B32>;
+defm SULD_1D_ARRAY_I64_CLAMP : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", B64>;
+
+defm SULD_1D_ARRAY_I8_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", B16>;
+defm SULD_1D_ARRAY_I16_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", B16>;
+defm SULD_1D_ARRAY_I32_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", B32>;
+defm SULD_1D_ARRAY_I64_TRAP : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", B64>;
+
+defm SULD_1D_ARRAY_I8_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", B16>;
+defm SULD_1D_ARRAY_I16_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", B16>;
+defm SULD_1D_ARRAY_I32_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", B32>;
+defm SULD_1D_ARRAY_I64_ZERO : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", B64>;
class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ !con(surf, (ins B32:$x, B32:$y)),
inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
pattern>;
multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_2D_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, (intr i64:$s, i32:$x, i32:$y))]>;
def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
-defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
-defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
-defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
+defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", B16>;
+defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", B16>;
+defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", B32>;
+defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", B64>;
-defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
-defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
-defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
-defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
+defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", B16>;
+defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", B16>;
+defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", B32>;
+defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", B64>;
-defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
-defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
-defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
-defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
+defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", B16>;
+defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", B16>;
+defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", B32>;
+defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", B64>;
class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
- !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ !con(surf, (ins B32:$l, B32:$x, B32:$y)),
inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
pattern>;
multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_2D_ARRAY_base<inst, outtype, (ins B64:$s),
[(set outtype:$r,
(intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
-defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
-defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
-defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
+defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", B16>;
+defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", B16>;
+defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", B32>;
+defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", B64>;
-defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
-defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
-defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
-defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
+defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", B16>;
+defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", B16>;
+defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", B32>;
+defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", B64>;
-defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
-defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
-defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
-defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
+defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", B16>;
+defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", B16>;
+defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", B32>;
+defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", B64>;
class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ !con(surf, (ins B32:$x, B32:$y, B32:$z)),
inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
pattern>;
multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_3D_base<inst, outtype, (ins B64:$s),
[(set outtype:$r,
(intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
-defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
-defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
-defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
+defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", B16>;
+defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", B16>;
+defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", B32>;
+defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", B64>;
-defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
-defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
-defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
-defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
+defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", B16>;
+defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", B16>;
+defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", B32>;
+defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", B64>;
-defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
-defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
-defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
-defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
+defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", B16>;
+defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", B16>;
+defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", B32>;
+defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", B64>;
}
let IsSuld = 2 in {
@@ -4062,188 +3524,152 @@ let IsSuld = 2 in {
class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
- !con(surf, (ins Int32Regs:$x)),
+ !con(surf, (ins B32:$x)),
inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
pattern>;
multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_1D_V2_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g,
(intr i64:$s, i32:$x))]>;
def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
-defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
-defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
-defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
+defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", B16>;
+defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", B16>;
+defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", B32>;
+defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", B64>;
-defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
-defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
-defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
-defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
+defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", B16>;
+defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", B16>;
+defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", B32>;
+defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", B64>;
-defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
-defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
-defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
-defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
+defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", B16>;
+defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", B16>;
+defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", B32>;
+defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", B64>;
class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
- !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ !con(surf, (ins B32:$l, B32:$x)),
inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
pattern>;
multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g,
(intr i64:$s, i32:$l, i32:$x))]>;
def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_1D_ARRAY_V2I8_CLAMP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
-defm SULD_1D_ARRAY_V2I16_CLAMP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
-defm SULD_1D_ARRAY_V2I32_CLAMP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
-defm SULD_1D_ARRAY_V2I64_CLAMP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
-
-defm SULD_1D_ARRAY_V2I8_TRAP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
-defm SULD_1D_ARRAY_V2I16_TRAP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
-defm SULD_1D_ARRAY_V2I32_TRAP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
-defm SULD_1D_ARRAY_V2I64_TRAP
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
-
-defm SULD_1D_ARRAY_V2I8_ZERO
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
-defm SULD_1D_ARRAY_V2I16_ZERO
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
-defm SULD_1D_ARRAY_V2I32_ZERO
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
-defm SULD_1D_ARRAY_V2I64_ZERO
- : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
+defm SULD_1D_ARRAY_V2I8_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", B16>;
+defm SULD_1D_ARRAY_V2I16_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", B16>;
+defm SULD_1D_ARRAY_V2I32_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", B32>;
+defm SULD_1D_ARRAY_V2I64_CLAMP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", B64>;
+
+defm SULD_1D_ARRAY_V2I8_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", B16>;
+defm SULD_1D_ARRAY_V2I16_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", B16>;
+defm SULD_1D_ARRAY_V2I32_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", B32>;
+defm SULD_1D_ARRAY_V2I64_TRAP : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", B64>;
+
+defm SULD_1D_ARRAY_V2I8_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", B16>;
+defm SULD_1D_ARRAY_V2I16_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", B16>;
+defm SULD_1D_ARRAY_V2I32_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", B32>;
+defm SULD_1D_ARRAY_V2I64_ZERO : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", B64>;
class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ !con(surf, (ins B32:$x, B32:$y)),
inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
pattern>;
multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_2D_V2_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g,
(intr i64:$s, i32:$x, i32:$y))]>;
def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_2D_V2I8_CLAMP
- : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
-defm SULD_2D_V2I16_CLAMP
- : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
-defm SULD_2D_V2I32_CLAMP
- : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
-defm SULD_2D_V2I64_CLAMP
- : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
-
-defm SULD_2D_V2I8_TRAP
- : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
-defm SULD_2D_V2I16_TRAP
- : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
-defm SULD_2D_V2I32_TRAP
- : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
-defm SULD_2D_V2I64_TRAP
- : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
-
-defm SULD_2D_V2I8_ZERO
- : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
-defm SULD_2D_V2I16_ZERO
- : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
-defm SULD_2D_V2I32_ZERO
- : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
-defm SULD_2D_V2I64_ZERO
- : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
+defm SULD_2D_V2I8_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", B16>;
+defm SULD_2D_V2I16_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", B16>;
+defm SULD_2D_V2I32_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", B32>;
+defm SULD_2D_V2I64_CLAMP : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", B64>;
+
+defm SULD_2D_V2I8_TRAP : SULD_2D_V2<"suld.b.2d.v2.b8.trap", B16>;
+defm SULD_2D_V2I16_TRAP : SULD_2D_V2<"suld.b.2d.v2.b16.trap", B16>;
+defm SULD_2D_V2I32_TRAP : SULD_2D_V2<"suld.b.2d.v2.b32.trap", B32>;
+defm SULD_2D_V2I64_TRAP : SULD_2D_V2<"suld.b.2d.v2.b64.trap", B64>;
+
+defm SULD_2D_V2I8_ZERO : SULD_2D_V2<"suld.b.2d.v2.b8.zero", B16>;
+defm SULD_2D_V2I16_ZERO : SULD_2D_V2<"suld.b.2d.v2.b16.zero", B16>;
+defm SULD_2D_V2I32_ZERO : SULD_2D_V2<"suld.b.2d.v2.b32.zero", B32>;
+defm SULD_2D_V2I64_ZERO : SULD_2D_V2<"suld.b.2d.v2.b64.zero", B64>;
class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
- !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ !con(surf, (ins B32:$l, B32:$x, B32:$y)),
inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
pattern>;
multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g,
(intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_2D_ARRAY_V2I8_CLAMP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
-defm SULD_2D_ARRAY_V2I16_CLAMP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
-defm SULD_2D_ARRAY_V2I32_CLAMP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
-defm SULD_2D_ARRAY_V2I64_CLAMP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
-
-defm SULD_2D_ARRAY_V2I8_TRAP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
-defm SULD_2D_ARRAY_V2I16_TRAP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
-defm SULD_2D_ARRAY_V2I32_TRAP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
-defm SULD_2D_ARRAY_V2I64_TRAP
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
-
-defm SULD_2D_ARRAY_V2I8_ZERO
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
-defm SULD_2D_ARRAY_V2I16_ZERO
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
-defm SULD_2D_ARRAY_V2I32_ZERO
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
-defm SULD_2D_ARRAY_V2I64_ZERO
- : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
+defm SULD_2D_ARRAY_V2I8_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", B16>;
+defm SULD_2D_ARRAY_V2I16_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", B16>;
+defm SULD_2D_ARRAY_V2I32_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", B32>;
+defm SULD_2D_ARRAY_V2I64_CLAMP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", B64>;
+
+defm SULD_2D_ARRAY_V2I8_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", B16>;
+defm SULD_2D_ARRAY_V2I16_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", B16>;
+defm SULD_2D_ARRAY_V2I32_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", B32>;
+defm SULD_2D_ARRAY_V2I64_TRAP : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", B64>;
+
+defm SULD_2D_ARRAY_V2I8_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", B16>;
+defm SULD_2D_ARRAY_V2I16_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", B16>;
+defm SULD_2D_ARRAY_V2I32_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", B32>;
+defm SULD_2D_ARRAY_V2I64_ZERO : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", B64>;
class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ !con(surf, (ins B32:$x, B32:$y, B32:$z)),
inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
pattern>;
multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_3D_V2_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g,
(intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
-defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
-defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
-defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
+defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", B16>;
+defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", B16>;
+defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", B32>;
+defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", B64>;
-defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
-defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
-defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
-defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
+defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", B16>;
+defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", B16>;
+defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", B32>;
+defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", B64>;
-defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
-defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
-defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
-defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
+defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", B16>;
+defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", B16>;
+defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", B32>;
+defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", B64>;
}
@@ -4252,157 +3678,139 @@ let IsSuld = 3 in {
class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
- !con(surf, (ins Int32Regs:$x)),
+ !con(surf, (ins B32:$x)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
pattern>;
multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_1D_V4_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
(intr i64:$s, i32:$x))]>;
def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
-defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
-defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
+defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", B16>;
+defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", B16>;
+defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", B32>;
-defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
-defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
-defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
+defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", B16>;
+defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", B16>;
+defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", B32>;
-defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
-defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
-defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
+defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", B16>;
+defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", B16>;
+defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", B32>;
class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
- !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ !con(surf, (ins B32:$l, B32:$x)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
pattern>;
multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g, outtype:$b,
outtype:$a,
(intr i64:$s, i32:$l, i32:$x))]>;
def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_1D_ARRAY_V4I8_CLAMP
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
-defm SULD_1D_ARRAY_V4I16_CLAMP
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
-defm SULD_1D_ARRAY_V4I32_CLAMP
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_V4I8_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", B16>;
+defm SULD_1D_ARRAY_V4I16_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", B16>;
+defm SULD_1D_ARRAY_V4I32_CLAMP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", B32>;
-defm SULD_1D_ARRAY_V4I8_TRAP
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
-defm SULD_1D_ARRAY_V4I16_TRAP
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
-defm SULD_1D_ARRAY_V4I32_TRAP
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_V4I8_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", B16>;
+defm SULD_1D_ARRAY_V4I16_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", B16>;
+defm SULD_1D_ARRAY_V4I32_TRAP : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", B32>;
-defm SULD_1D_ARRAY_V4I8_ZERO
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
-defm SULD_1D_ARRAY_V4I16_ZERO
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
-defm SULD_1D_ARRAY_V4I32_ZERO
- : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_V4I8_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", B16>;
+defm SULD_1D_ARRAY_V4I16_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", B16>;
+defm SULD_1D_ARRAY_V4I32_ZERO : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", B32>;
class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ !con(surf, (ins B32:$x, B32:$y)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
pattern>;
multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_2D_V4_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
(intr i64:$s, i32:$x, i32:$y))]>;
def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
-defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
-defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
+defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", B16>;
+defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", B16>;
+defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", B32>;
-defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
-defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
-defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
+defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", B16>;
+defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", B16>;
+defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", B32>;
-defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
-defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
-defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
+defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", B16>;
+defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", B16>;
+defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", B32>;
class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
- !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ !con(surf, (ins B32:$l, B32:$x, B32:$y)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
pattern>;
multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g, outtype:$b,
outtype:$a,
(intr i64:$s, i32:$l, i32:$x, i32:$y))]>;
def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_2D_ARRAY_V4I8_CLAMP
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
-defm SULD_2D_ARRAY_V4I16_CLAMP
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
-defm SULD_2D_ARRAY_V4I32_CLAMP
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_V4I8_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", B16>;
+defm SULD_2D_ARRAY_V4I16_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", B16>;
+defm SULD_2D_ARRAY_V4I32_CLAMP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", B32>;
-defm SULD_2D_ARRAY_V4I8_TRAP
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
-defm SULD_2D_ARRAY_V4I16_TRAP
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
-defm SULD_2D_ARRAY_V4I32_TRAP
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_V4I8_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", B16>;
+defm SULD_2D_ARRAY_V4I16_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", B16>;
+defm SULD_2D_ARRAY_V4I32_TRAP : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", B32>;
-defm SULD_2D_ARRAY_V4I8_ZERO
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
-defm SULD_2D_ARRAY_V4I16_ZERO
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
-defm SULD_2D_ARRAY_V4I32_ZERO
- : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_V4I8_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", B16>;
+defm SULD_2D_ARRAY_V4I16_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", B16>;
+defm SULD_2D_ARRAY_V4I32_ZERO : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", B32>;
class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf,
list<dag> pattern = []>
: NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ !con(surf, (ins B32:$x, B32:$y, B32:$z)),
inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
pattern>;
multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s),
+ def _R : SULD_3D_V4_base<inst, outtype, (ins B64:$s),
[(set outtype:$r, outtype:$g, outtype:$b, outtype:$a,
(intr i64:$s, i32:$x, i32:$y, i32:$z))]>;
def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
}
-defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
-defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
-defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
+defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", B16>;
+defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", B16>;
+defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", B32>;
-defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
-defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
-defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
+defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", B16>;
+defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", B16>;
+defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", B32>;
-defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
-defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
-defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
+defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", B16>;
+defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", B16>;
+defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", B32>;
}
@@ -4414,11 +3822,11 @@ let IsSurfTexQuery = true in {
foreach query = ["channel_order", "channel_data_type", "width", "height",
"depth", "array_size", "num_samples", "num_mipmap_levels"] in {
def TXQ_ # !toupper(query) # _R
- : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ : NVPTXInst<(outs B32:$d), (ins B64:$a),
"txq." # query # ".b32 \t$d, [$a];",
[(set i32:$d, (!cast<Intrinsic>("int_nvvm_txq_" # query) i64:$a))]>;
def TXQ_ # !toupper(query) # _I
- : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ : NVPTXInst<(outs B32:$d), (ins i64imm:$a),
"txq." # query # ".b32 \t$d, [$a];",
[]>;
}
@@ -4431,11 +3839,11 @@ let IsSurfTexQuery = true in {
let IsSurfTexQuery = true in {
foreach query = ["channel_order", "channel_data_type", "width", "height", "depth", "array_size"] in {
def SUQ_ # !toupper(query) # _R
- : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ : NVPTXInst<(outs B32:$d), (ins B64:$a),
"suq." # query # ".b32 \t$d, [$a];",
[(set i32:$d, (!cast<Intrinsic>("int_nvvm_suq_" # query) i64:$a))]>;
def SUQ_ # !toupper(query) # _I
- : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ : NVPTXInst<(outs B32:$d), (ins i64imm:$a),
"suq." # query # ".b32 \t$d, [$a];",
[]>;
}
@@ -4445,15 +3853,15 @@ let IsSurfTexQuery = true in {
// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
def ISTYPEP_SAMPLER
- : BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ : BasicNVPTXInst<(outs B1:$d), (ins B64:$a),
"istypep.samplerref",
[(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>;
def ISTYPEP_SURFACE
- : BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ : BasicNVPTXInst<(outs B1:$d), (ins B64:$a),
"istypep.surfref",
[(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>;
def ISTYPEP_TEXTURE
- : BasicNVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ : BasicNVPTXInst<(outs B1:$d), (ins B64:$a),
"istypep.texref",
[(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>;
@@ -4463,561 +3871,489 @@ let IsSust = true in {
class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, intype:$r)),
+ !con(surf, (ins B32:$x, intype:$r)),
inst # " \t[$s, \\{$x\\}], \\{$r\\};", pat>;
multiclass SUST_1D<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, intype:$r)]>;
+ def _R : SUST_1D_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, intype:$r)]>;
def _I : SUST_1D_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_1D_I8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
-defm SUST_B_1D_I16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
-defm SUST_B_1D_I32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
-defm SUST_B_1D_I64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
+defm SUST_B_1D_I8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", B16>;
+defm SUST_B_1D_I16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", B16>;
+defm SUST_B_1D_I32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", B32>;
+defm SUST_B_1D_I64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", B64>;
-defm SUST_B_1D_I8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
-defm SUST_B_1D_I16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
-defm SUST_B_1D_I32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
-defm SUST_B_1D_I64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
+defm SUST_B_1D_I8_TRAP : SUST_1D<"sust.b.1d.b8.trap", B16>;
+defm SUST_B_1D_I16_TRAP : SUST_1D<"sust.b.1d.b16.trap", B16>;
+defm SUST_B_1D_I32_TRAP : SUST_1D<"sust.b.1d.b32.trap", B32>;
+defm SUST_B_1D_I64_TRAP : SUST_1D<"sust.b.1d.b64.trap", B64>;
-defm SUST_B_1D_I8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
-defm SUST_B_1D_I16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
-defm SUST_B_1D_I32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
-defm SUST_B_1D_I64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
+defm SUST_B_1D_I8_ZERO : SUST_1D<"sust.b.1d.b8.zero", B16>;
+defm SUST_B_1D_I16_ZERO : SUST_1D<"sust.b.1d.b16.zero", B16>;
+defm SUST_B_1D_I32_ZERO : SUST_1D<"sust.b.1d.b32.zero", B32>;
+defm SUST_B_1D_I64_ZERO : SUST_1D<"sust.b.1d.b64.zero", B64>;
-defm SUST_P_1D_I8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
-defm SUST_P_1D_I16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
-defm SUST_P_1D_I32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
+defm SUST_P_1D_I8_TRAP : SUST_1D<"sust.p.1d.b8.trap", B16>;
+defm SUST_P_1D_I16_TRAP : SUST_1D<"sust.p.1d.b16.trap", B16>;
+defm SUST_P_1D_I32_TRAP : SUST_1D<"sust.p.1d.b32.trap", B32>;
class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
+ !con(surf, (ins B32:$x, intype:$r, intype:$g)),
inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
pat>;
multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, intype:$r, intype:$g)]>;
+ def _R : SUST_1D_V2_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, intype:$r, intype:$g)]>;
def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s), []>;
}
// int_nvvm_sust_b_1d_v2i8_clamp
-defm SUST_B_1D_V2I8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
-defm SUST_B_1D_V2I16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
-defm SUST_B_1D_V2I32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
-defm SUST_B_1D_V2I64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
+defm SUST_B_1D_V2I8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", B16>;
+defm SUST_B_1D_V2I16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", B16>;
+defm SUST_B_1D_V2I32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", B32>;
+defm SUST_B_1D_V2I64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", B64>;
-defm SUST_B_1D_V2I8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
-defm SUST_B_1D_V2I16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
-defm SUST_B_1D_V2I32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
-defm SUST_B_1D_V2I64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
+defm SUST_B_1D_V2I8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", B16>;
+defm SUST_B_1D_V2I16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", B16>;
+defm SUST_B_1D_V2I32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", B32>;
+defm SUST_B_1D_V2I64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", B64>;
-defm SUST_B_1D_V2I8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
-defm SUST_B_1D_V2I16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
-defm SUST_B_1D_V2I32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
-defm SUST_B_1D_V2I64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
+defm SUST_B_1D_V2I8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", B16>;
+defm SUST_B_1D_V2I16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", B16>;
+defm SUST_B_1D_V2I32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", B32>;
+defm SUST_B_1D_V2I64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", B64>;
-defm SUST_P_1D_V2I8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
-defm SUST_P_1D_V2I16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
-defm SUST_P_1D_V2I32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
+defm SUST_P_1D_V2I8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", B16>;
+defm SUST_P_1D_V2I16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", B16>;
+defm SUST_P_1D_V2I32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", B32>;
class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
+ !con(surf, (ins B32:$x, intype:$r, intype:$g,
intype:$b, intype:$a)),
inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
pat>;
multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, intype:$r, intype:$g,
+ def _R : SUST_1D_V4_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, intype:$r, intype:$g,
intype:$b, intype:$a)]>;
def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_1D_V4I8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
-defm SUST_B_1D_V4I16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
-defm SUST_B_1D_V4I32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
+defm SUST_B_1D_V4I8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", B16>;
+defm SUST_B_1D_V4I16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", B16>;
+defm SUST_B_1D_V4I32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", B32>;
-defm SUST_B_1D_V4I8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
-defm SUST_B_1D_V4I16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
-defm SUST_B_1D_V4I32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
+defm SUST_B_1D_V4I8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", B16>;
+defm SUST_B_1D_V4I16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", B16>;
+defm SUST_B_1D_V4I32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", B32>;
-defm SUST_B_1D_V4I8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
-defm SUST_B_1D_V4I16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
-defm SUST_B_1D_V4I32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
+defm SUST_B_1D_V4I8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", B16>;
+defm SUST_B_1D_V4I16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", B16>;
+defm SUST_B_1D_V4I32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", B32>;
-defm SUST_P_1D_V4I8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
-defm SUST_P_1D_V4I16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
-defm SUST_P_1D_V4I32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
+defm SUST_P_1D_V4I8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", B16>;
+defm SUST_P_1D_V4I16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", B16>;
+defm SUST_P_1D_V4I32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", B32>;
class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
+ !con(surf, (ins B32:$idx, B32:$x, intype:$r)),
inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
pat>;
multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, intype:$r)]>;
+ def _R : SUST_1D_ARRAY_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$idx, B32:$x, intype:$r)]>;
def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_1D_ARRAY_I8_CLAMP
- : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
-defm SUST_B_1D_ARRAY_I16_CLAMP
- : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
-defm SUST_B_1D_ARRAY_I32_CLAMP
- : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
-defm SUST_B_1D_ARRAY_I64_CLAMP
- : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
-
-defm SUST_B_1D_ARRAY_I8_TRAP
- : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
-defm SUST_B_1D_ARRAY_I16_TRAP
- : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
-defm SUST_B_1D_ARRAY_I32_TRAP
- : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
-defm SUST_B_1D_ARRAY_I64_TRAP
- : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
-
-defm SUST_B_1D_ARRAY_I8_ZERO
- : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
-defm SUST_B_1D_ARRAY_I16_ZERO
- : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
-defm SUST_B_1D_ARRAY_I32_ZERO
- : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
-defm SUST_B_1D_ARRAY_I64_ZERO
- : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
-
-defm SUST_P_1D_ARRAY_I8_TRAP
- : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
-defm SUST_P_1D_ARRAY_I16_TRAP
- : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
-defm SUST_P_1D_ARRAY_I32_TRAP
- : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_I8_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", B16>;
+defm SUST_B_1D_ARRAY_I16_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", B16>;
+defm SUST_B_1D_ARRAY_I32_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", B32>;
+defm SUST_B_1D_ARRAY_I64_CLAMP : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", B64>;
+
+defm SUST_B_1D_ARRAY_I8_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", B16>;
+defm SUST_B_1D_ARRAY_I16_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", B16>;
+defm SUST_B_1D_ARRAY_I32_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", B32>;
+defm SUST_B_1D_ARRAY_I64_TRAP : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", B64>;
+
+defm SUST_B_1D_ARRAY_I8_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", B16>;
+defm SUST_B_1D_ARRAY_I16_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", B16>;
+defm SUST_B_1D_ARRAY_I32_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", B32>;
+defm SUST_B_1D_ARRAY_I64_ZERO : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", B64>;
+
+defm SUST_P_1D_ARRAY_I8_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", B16>;
+defm SUST_P_1D_ARRAY_I16_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", B16>;
+defm SUST_P_1D_ARRAY_I32_TRAP : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", B32>;
class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ !con(surf, (ins B32:$idx, B32:$x,
intype:$r, intype:$g)),
inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
pat>;
multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x,
+ def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$idx, B32:$x,
intype:$r, intype:$g)]>;
def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_1D_ARRAY_V2I8_CLAMP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
-defm SUST_B_1D_ARRAY_V2I16_CLAMP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
-defm SUST_B_1D_ARRAY_V2I32_CLAMP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
-defm SUST_B_1D_ARRAY_V2I64_CLAMP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
-
-defm SUST_B_1D_ARRAY_V2I8_TRAP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
-defm SUST_B_1D_ARRAY_V2I16_TRAP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
-defm SUST_B_1D_ARRAY_V2I32_TRAP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
-defm SUST_B_1D_ARRAY_V2I64_TRAP
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
-
-defm SUST_B_1D_ARRAY_V2I8_ZERO
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
-defm SUST_B_1D_ARRAY_V2I16_ZERO
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
-defm SUST_B_1D_ARRAY_V2I32_ZERO
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
-defm SUST_B_1D_ARRAY_V2I64_ZERO
- : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
-
-defm SUST_P_1D_ARRAY_V2I8_TRAP
- : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
-defm SUST_P_1D_ARRAY_V2I16_TRAP
- : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
-defm SUST_P_1D_ARRAY_V2I32_TRAP
- : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2I8_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", B16>;
+defm SUST_B_1D_ARRAY_V2I16_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", B16>;
+defm SUST_B_1D_ARRAY_V2I32_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", B32>;
+defm SUST_B_1D_ARRAY_V2I64_CLAMP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", B64>;
+
+defm SUST_B_1D_ARRAY_V2I8_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", B16>;
+defm SUST_B_1D_ARRAY_V2I16_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", B16>;
+defm SUST_B_1D_ARRAY_V2I32_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", B32>;
+defm SUST_B_1D_ARRAY_V2I64_TRAP : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", B64>;
+
+defm SUST_B_1D_ARRAY_V2I8_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", B16>;
+defm SUST_B_1D_ARRAY_V2I16_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", B16>;
+defm SUST_B_1D_ARRAY_V2I32_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", B32>;
+defm SUST_B_1D_ARRAY_V2I64_ZERO : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", B64>;
+
+defm SUST_P_1D_ARRAY_V2I8_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", B16>;
+defm SUST_P_1D_ARRAY_V2I16_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", B16>;
+defm SUST_P_1D_ARRAY_V2I32_TRAP : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", B32>;
class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ !con(surf, (ins B32:$idx, B32:$x,
intype:$r, intype:$g, intype:$b, intype:$a)),
inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
pat>;
multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x,
+ def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$idx, B32:$x,
intype:$r, intype:$g, intype:$b, intype:$a)]>;
def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s), []>;
}
defm SUST_B_1D_ARRAY_V4I8_CLAMP
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", B16>;
defm SUST_B_1D_ARRAY_V4I16_CLAMP
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", B16>;
defm SUST_B_1D_ARRAY_V4I32_CLAMP
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", B32>;
defm SUST_B_1D_ARRAY_V4I8_TRAP
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", B16>;
defm SUST_B_1D_ARRAY_V4I16_TRAP
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", B16>;
defm SUST_B_1D_ARRAY_V4I32_TRAP
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", B32>;
defm SUST_B_1D_ARRAY_V4I8_ZERO
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", B16>;
defm SUST_B_1D_ARRAY_V4I16_ZERO
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", B16>;
defm SUST_B_1D_ARRAY_V4I32_ZERO
- : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", B32>;
defm SUST_P_1D_ARRAY_V4I8_TRAP
- : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", B16>;
defm SUST_P_1D_ARRAY_V4I16_TRAP
- : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", B16>;
defm SUST_P_1D_ARRAY_V4I32_TRAP
- : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", B32>;
class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
+ !con(surf, (ins B32:$x, B32:$y, intype:$r)),
inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
pat>;
multiclass SUST_2D<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, intype:$r)]>;
+ def _R : SUST_2D_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, B32:$y, intype:$r)]>;
def _I : SUST_2D_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_2D_I8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
-defm SUST_B_2D_I16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
-defm SUST_B_2D_I32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
-defm SUST_B_2D_I64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
+defm SUST_B_2D_I8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", B16>;
+defm SUST_B_2D_I16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", B16>;
+defm SUST_B_2D_I32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", B32>;
+defm SUST_B_2D_I64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", B64>;
-defm SUST_B_2D_I8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
-defm SUST_B_2D_I16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
-defm SUST_B_2D_I32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
-defm SUST_B_2D_I64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
+defm SUST_B_2D_I8_TRAP : SUST_2D<"sust.b.2d.b8.trap", B16>;
+defm SUST_B_2D_I16_TRAP : SUST_2D<"sust.b.2d.b16.trap", B16>;
+defm SUST_B_2D_I32_TRAP : SUST_2D<"sust.b.2d.b32.trap", B32>;
+defm SUST_B_2D_I64_TRAP : SUST_2D<"sust.b.2d.b64.trap", B64>;
-defm SUST_B_2D_I8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
-defm SUST_B_2D_I16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
-defm SUST_B_2D_I32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
-defm SUST_B_2D_I64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
+defm SUST_B_2D_I8_ZERO : SUST_2D<"sust.b.2d.b8.zero", B16>;
+defm SUST_B_2D_I16_ZERO : SUST_2D<"sust.b.2d.b16.zero", B16>;
+defm SUST_B_2D_I32_ZERO : SUST_2D<"sust.b.2d.b32.zero", B32>;
+defm SUST_B_2D_I64_ZERO : SUST_2D<"sust.b.2d.b64.zero", B64>;
-defm SUST_P_2D_I8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
-defm SUST_P_2D_I16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
-defm SUST_P_2D_I32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
+defm SUST_P_2D_I8_TRAP : SUST_2D<"sust.p.2d.b8.trap", B16>;
+defm SUST_P_2D_I16_TRAP : SUST_2D<"sust.p.2d.b16.trap", B16>;
+defm SUST_P_2D_I32_TRAP : SUST_2D<"sust.p.2d.b32.trap", B32>;
class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ !con(surf, (ins B32:$x, B32:$y,
intype:$r, intype:$g)),
inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
pat>;
multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ def _R : SUST_2D_V2_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, B32:$y,
intype:$r, intype:$g)]>;
def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_2D_V2I8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
-defm SUST_B_2D_V2I16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
-defm SUST_B_2D_V2I32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
-defm SUST_B_2D_V2I64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
+defm SUST_B_2D_V2I8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", B16>;
+defm SUST_B_2D_V2I16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", B16>;
+defm SUST_B_2D_V2I32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", B32>;
+defm SUST_B_2D_V2I64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", B64>;
-defm SUST_B_2D_V2I8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
-defm SUST_B_2D_V2I16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
-defm SUST_B_2D_V2I32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
-defm SUST_B_2D_V2I64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
+defm SUST_B_2D_V2I8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", B16>;
+defm SUST_B_2D_V2I16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", B16>;
+defm SUST_B_2D_V2I32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", B32>;
+defm SUST_B_2D_V2I64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", B64>;
-defm SUST_B_2D_V2I8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
-defm SUST_B_2D_V2I16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
-defm SUST_B_2D_V2I32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
-defm SUST_B_2D_V2I64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
+defm SUST_B_2D_V2I8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", B16>;
+defm SUST_B_2D_V2I16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", B16>;
+defm SUST_B_2D_V2I32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", B32>;
+defm SUST_B_2D_V2I64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", B64>;
-defm SUST_P_2D_V2I8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
-defm SUST_P_2D_V2I16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
-defm SUST_P_2D_V2I32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
+defm SUST_P_2D_V2I8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", B16>;
+defm SUST_P_2D_V2I16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", B16>;
+defm SUST_P_2D_V2I32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", B32>;
class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ !con(surf, (ins B32:$x, B32:$y,
intype:$r, intype:$g, intype:$b, intype:$a)),
inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
pat>;
multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ def _R : SUST_2D_V4_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, B32:$y,
intype:$r, intype:$g, intype:$b, intype:$a)]>;
def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_2D_V4I8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
-defm SUST_B_2D_V4I16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
-defm SUST_B_2D_V4I32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
+defm SUST_B_2D_V4I8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", B16>;
+defm SUST_B_2D_V4I16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", B16>;
+defm SUST_B_2D_V4I32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", B32>;
-defm SUST_B_2D_V4I8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
-defm SUST_B_2D_V4I16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
-defm SUST_B_2D_V4I32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
+defm SUST_B_2D_V4I8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", B16>;
+defm SUST_B_2D_V4I16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", B16>;
+defm SUST_B_2D_V4I32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", B32>;
-defm SUST_B_2D_V4I8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
-defm SUST_B_2D_V4I16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
-defm SUST_B_2D_V4I32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
+defm SUST_B_2D_V4I8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", B16>;
+defm SUST_B_2D_V4I16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", B16>;
+defm SUST_B_2D_V4I32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", B32>;
-defm SUST_P_2D_V4I8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
-defm SUST_P_2D_V4I16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
-defm SUST_P_2D_V4I32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
+defm SUST_P_2D_V4I8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", B16>;
+defm SUST_P_2D_V4I16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", B16>;
+defm SUST_P_2D_V4I32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", B32>;
class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ !con(surf, (ins B32:$idx, B32:$x, B32:$y,
intype:$r)),
inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
pat>;
multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ def _R : SUST_2D_ARRAY_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$idx, B32:$x, B32:$y,
intype:$r)]>;
def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_2D_ARRAY_I8_CLAMP
- : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
-defm SUST_B_2D_ARRAY_I16_CLAMP
- : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
-defm SUST_B_2D_ARRAY_I32_CLAMP
- : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
-defm SUST_B_2D_ARRAY_I64_CLAMP
- : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
-
-defm SUST_B_2D_ARRAY_I8_TRAP
- : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
-defm SUST_B_2D_ARRAY_I16_TRAP
- : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
-defm SUST_B_2D_ARRAY_I32_TRAP
- : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
-defm SUST_B_2D_ARRAY_I64_TRAP
- : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
-
-defm SUST_B_2D_ARRAY_I8_ZERO
- : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
-defm SUST_B_2D_ARRAY_I16_ZERO
- : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
-defm SUST_B_2D_ARRAY_I32_ZERO
- : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
-defm SUST_B_2D_ARRAY_I64_ZERO
- : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
-
-defm SUST_P_2D_ARRAY_I8_TRAP
- : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
-defm SUST_P_2D_ARRAY_I16_TRAP
- : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
-defm SUST_P_2D_ARRAY_I32_TRAP
- : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_I8_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", B16>;
+defm SUST_B_2D_ARRAY_I16_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", B16>;
+defm SUST_B_2D_ARRAY_I32_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", B32>;
+defm SUST_B_2D_ARRAY_I64_CLAMP : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", B64>;
+
+defm SUST_B_2D_ARRAY_I8_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", B16>;
+defm SUST_B_2D_ARRAY_I16_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", B16>;
+defm SUST_B_2D_ARRAY_I32_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", B32>;
+defm SUST_B_2D_ARRAY_I64_TRAP : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", B64>;
+
+defm SUST_B_2D_ARRAY_I8_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", B16>;
+defm SUST_B_2D_ARRAY_I16_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", B16>;
+defm SUST_B_2D_ARRAY_I32_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", B32>;
+defm SUST_B_2D_ARRAY_I64_ZERO : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", B64>;
+
+defm SUST_P_2D_ARRAY_I8_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", B16>;
+defm SUST_P_2D_ARRAY_I16_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", B16>;
+defm SUST_P_2D_ARRAY_I32_TRAP : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", B32>;
class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ !con(surf, (ins B32:$idx, B32:$x, B32:$y,
intype:$r, intype:$g)),
inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
pat>;
multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$idx, B32:$x, B32:$y,
intype:$r, intype:$g)]>;
def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_2D_ARRAY_V2I8_CLAMP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
-defm SUST_B_2D_ARRAY_V2I16_CLAMP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
-defm SUST_B_2D_ARRAY_V2I32_CLAMP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
-defm SUST_B_2D_ARRAY_V2I64_CLAMP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
-
-defm SUST_B_2D_ARRAY_V2I8_TRAP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
-defm SUST_B_2D_ARRAY_V2I16_TRAP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
-defm SUST_B_2D_ARRAY_V2I32_TRAP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
-defm SUST_B_2D_ARRAY_V2I64_TRAP
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
-
-defm SUST_B_2D_ARRAY_V2I8_ZERO
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
-defm SUST_B_2D_ARRAY_V2I16_ZERO
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
-defm SUST_B_2D_ARRAY_V2I32_ZERO
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
-defm SUST_B_2D_ARRAY_V2I64_ZERO
- : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
-
-defm SUST_P_2D_ARRAY_V2I8_TRAP
- : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
-defm SUST_P_2D_ARRAY_V2I16_TRAP
- : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
-defm SUST_P_2D_ARRAY_V2I32_TRAP
- : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2I8_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", B16>;
+defm SUST_B_2D_ARRAY_V2I16_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", B16>;
+defm SUST_B_2D_ARRAY_V2I32_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", B32>;
+defm SUST_B_2D_ARRAY_V2I64_CLAMP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", B64>;
+
+defm SUST_B_2D_ARRAY_V2I8_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", B16>;
+defm SUST_B_2D_ARRAY_V2I16_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", B16>;
+defm SUST_B_2D_ARRAY_V2I32_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", B32>;
+defm SUST_B_2D_ARRAY_V2I64_TRAP : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", B64>;
+
+defm SUST_B_2D_ARRAY_V2I8_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", B16>;
+defm SUST_B_2D_ARRAY_V2I16_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", B16>;
+defm SUST_B_2D_ARRAY_V2I32_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", B32>;
+defm SUST_B_2D_ARRAY_V2I64_ZERO : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", B64>;
+
+defm SUST_P_2D_ARRAY_V2I8_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", B16>;
+defm SUST_P_2D_ARRAY_V2I16_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", B16>;
+defm SUST_P_2D_ARRAY_V2I32_TRAP : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", B32>;
class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ !con(surf, (ins B32:$idx, B32:$x, B32:$y,
intype:$r, intype:$g, intype:$b, intype:$a)),
inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
pat>;
multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$idx, B32:$x, B32:$y,
intype:$r, intype:$g, intype:$b, intype:$a)]>;
def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_2D_ARRAY_V4I8_CLAMP
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
-defm SUST_B_2D_ARRAY_V4I16_CLAMP
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
-defm SUST_B_2D_ARRAY_V4I32_CLAMP
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
-
-defm SUST_B_2D_ARRAY_V4I8_TRAP
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
-defm SUST_B_2D_ARRAY_V4I16_TRAP
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
-defm SUST_B_2D_ARRAY_V4I32_TRAP
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
-
-defm SUST_B_2D_ARRAY_V4I8_ZERO
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
-defm SUST_B_2D_ARRAY_V4I16_ZERO
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
-defm SUST_B_2D_ARRAY_V4I32_ZERO
- : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
-
-defm SUST_P_2D_ARRAY_V4I8_TRAP
- : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
-defm SUST_P_2D_ARRAY_V4I16_TRAP
- : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
-defm SUST_P_2D_ARRAY_V4I32_TRAP
- : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_V4I8_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", B16>;
+defm SUST_B_2D_ARRAY_V4I16_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", B16>;
+defm SUST_B_2D_ARRAY_V4I32_CLAMP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", B32>;
+
+defm SUST_B_2D_ARRAY_V4I8_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", B16>;
+defm SUST_B_2D_ARRAY_V4I16_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", B16>;
+defm SUST_B_2D_ARRAY_V4I32_TRAP : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", B32>;
+
+defm SUST_B_2D_ARRAY_V4I8_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", B16>;
+defm SUST_B_2D_ARRAY_V4I16_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", B16>;
+defm SUST_B_2D_ARRAY_V4I32_ZERO : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", B32>;
+
+defm SUST_P_2D_ARRAY_V4I8_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", B16>;
+defm SUST_P_2D_ARRAY_V4I16_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", B16>;
+defm SUST_P_2D_ARRAY_V4I32_TRAP : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", B32>;
class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ !con(surf, (ins B32:$x, B32:$y, B32:$z,
intype:$r)),
inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
pat>;
multiclass SUST_3D<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ def _R : SUST_3D_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, B32:$y, B32:$z,
intype:$r)]>;
def _I : SUST_3D_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_3D_I8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
-defm SUST_B_3D_I16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
-defm SUST_B_3D_I32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
-defm SUST_B_3D_I64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
+defm SUST_B_3D_I8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", B16>;
+defm SUST_B_3D_I16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", B16>;
+defm SUST_B_3D_I32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", B32>;
+defm SUST_B_3D_I64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", B64>;
-defm SUST_B_3D_I8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
-defm SUST_B_3D_I16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
-defm SUST_B_3D_I32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
-defm SUST_B_3D_I64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
+defm SUST_B_3D_I8_TRAP : SUST_3D<"sust.b.3d.b8.trap", B16>;
+defm SUST_B_3D_I16_TRAP : SUST_3D<"sust.b.3d.b16.trap", B16>;
+defm SUST_B_3D_I32_TRAP : SUST_3D<"sust.b.3d.b32.trap", B32>;
+defm SUST_B_3D_I64_TRAP : SUST_3D<"sust.b.3d.b64.trap", B64>;
-defm SUST_B_3D_I8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
-defm SUST_B_3D_I16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
-defm SUST_B_3D_I32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
-defm SUST_B_3D_I64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
+defm SUST_B_3D_I8_ZERO : SUST_3D<"sust.b.3d.b8.zero", B16>;
+defm SUST_B_3D_I16_ZERO : SUST_3D<"sust.b.3d.b16.zero", B16>;
+defm SUST_B_3D_I32_ZERO : SUST_3D<"sust.b.3d.b32.zero", B32>;
+defm SUST_B_3D_I64_ZERO : SUST_3D<"sust.b.3d.b64.zero", B64>;
-defm SUST_P_3D_I8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
-defm SUST_P_3D_I16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
-defm SUST_P_3D_I32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
+defm SUST_P_3D_I8_TRAP : SUST_3D<"sust.p.3d.b8.trap", B16>;
+defm SUST_P_3D_I16_TRAP : SUST_3D<"sust.p.3d.b16.trap", B16>;
+defm SUST_P_3D_I32_TRAP : SUST_3D<"sust.p.3d.b32.trap", B32>;
class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ !con(surf, (ins B32:$x, B32:$y, B32:$z,
intype:$r, intype:$g)),
inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
pat>;
multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ def _R : SUST_3D_V2_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, B32:$y, B32:$z,
intype:$r, intype:$g)]>;
def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_3D_V2I8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
-defm SUST_B_3D_V2I16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
-defm SUST_B_3D_V2I32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
-defm SUST_B_3D_V2I64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
+defm SUST_B_3D_V2I8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", B16>;
+defm SUST_B_3D_V2I16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", B16>;
+defm SUST_B_3D_V2I32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", B32>;
+defm SUST_B_3D_V2I64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", B64>;
-defm SUST_B_3D_V2I8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
-defm SUST_B_3D_V2I16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
-defm SUST_B_3D_V2I32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
-defm SUST_B_3D_V2I64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
+defm SUST_B_3D_V2I8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", B16>;
+defm SUST_B_3D_V2I16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", B16>;
+defm SUST_B_3D_V2I32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", B32>;
+defm SUST_B_3D_V2I64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", B64>;
-defm SUST_B_3D_V2I8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
-defm SUST_B_3D_V2I16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
-defm SUST_B_3D_V2I32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
-defm SUST_B_3D_V2I64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
+defm SUST_B_3D_V2I8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", B16>;
+defm SUST_B_3D_V2I16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", B16>;
+defm SUST_B_3D_V2I32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", B32>;
+defm SUST_B_3D_V2I64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", B64>;
-defm SUST_P_3D_V2I8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
-defm SUST_P_3D_V2I16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
-defm SUST_P_3D_V2I32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
+defm SUST_P_3D_V2I8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", B16>;
+defm SUST_P_3D_V2I16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", B16>;
+defm SUST_P_3D_V2I32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", B32>;
class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf, list<dag> pat>
: NVPTXInst<(outs),
- !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ !con(surf, (ins B32:$x, B32:$y, B32:$z,
intype:$r, intype:$g, intype:$b, intype:$a)),
inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
pat>;
multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME));
- def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s),
- [(intr Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ def _R : SUST_3D_V4_base<inst, intype, (ins B64:$s),
+ [(intr B64:$s, B32:$x, B32:$y, B32:$z,
intype:$r, intype:$g, intype:$b, intype:$a)]>;
def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s), []>;
}
-defm SUST_B_3D_V4I8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
-defm SUST_B_3D_V4I16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
-defm SUST_B_3D_V4I32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V4I8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", B16>;
+defm SUST_B_3D_V4I16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", B16>;
+defm SUST_B_3D_V4I32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", B32>;
-defm SUST_B_3D_V4I8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
-defm SUST_B_3D_V4I16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
-defm SUST_B_3D_V4I32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
+defm SUST_B_3D_V4I8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", B16>;
+defm SUST_B_3D_V4I16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", B16>;
+defm SUST_B_3D_V4I32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", B32>;
-defm SUST_B_3D_V4I8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
-defm SUST_B_3D_V4I16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
-defm SUST_B_3D_V4I32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
+defm SUST_B_3D_V4I8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", B16>;
+defm SUST_B_3D_V4I16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", B16>;
+defm SUST_B_3D_V4I32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", B32>;
-defm SUST_P_3D_V4I8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
-defm SUST_P_3D_V4I16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
-defm SUST_P_3D_V4I32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
+defm SUST_P_3D_V4I8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", B16>;
+defm SUST_P_3D_V4I16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", B16>;
+defm SUST_P_3D_V4I32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", B32>;
}
@@ -5027,13 +4363,13 @@ defm SUST_P_3D_V4I32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
//-----------------------------------
class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
- : NVPTXInst<(outs Int64Regs:$d), (ins),
+ : NVPTXInst<(outs B64:$d), (ins),
"mov.u64 \t$d, %" # regname # ";",
[(set i64:$d, (intop))]>,
Requires<Preds>;
class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
- : NVPTXInst<(outs Int32Regs:$d), (ins),
+ : NVPTXInst<(outs B32:$d), (ins),
"mov.u32 \t$d, %" # regname # ";",
[(set i32:$d, (intop))]>,
Requires<Preds>;
@@ -5072,18 +4408,12 @@ def INT_PTX_SREG_CLUSTER_NCTARANK:
[hasSM<90>, hasPTX<78>]>;
-def INT_PTX_SREG_LANEID :
- PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
-def INT_PTX_SREG_WARPID :
- PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
-def INT_PTX_SREG_NWARPID :
- PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
-def INT_PTX_SREG_SMID :
- PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
-def INT_PTX_SREG_NSMID :
- PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
-def INT_PTX_SREG_GRIDID :
- PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
+def SREG_LANEID : PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
+def SREG_WARPID : PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
+def SREG_NWARPID : PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
+def SREG_SMID : PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
+def SREG_NSMID : PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
+def SREG_GRIDID : PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
def INT_PTX_SREG_LANEMASK_EQ :
PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
@@ -5097,16 +4427,13 @@ def INT_PTX_SREG_LANEMASK_GT :
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
let hasSideEffects = 1 in {
-def INT_PTX_SREG_CLOCK :
- PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
-def INT_PTX_SREG_CLOCK64 :
- PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
-def INT_PTX_SREG_GLOBALTIMER :
- PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
+def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
+def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
+def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
}
-def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
-def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
+def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>;
+def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>;
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
@@ -5116,7 +4443,7 @@ def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
// handle the constant.
def INT_PTX_SREG_WARPSIZE :
- NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
+ NVPTXInst<(outs B32:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
[(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
@@ -5127,21 +4454,21 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
: WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
// NVPTX register types used to carry fragment data.
NVPTXRegClass regclass = !cond(
- !eq(ptx_elt_type, "f16") : Int32Regs,
- !eq(ptx_elt_type, "f32") : Float32Regs,
- !eq(ptx_elt_type, "f64") : Float64Regs,
- !eq(ptx_elt_type, "bf16") : Int32Regs,
- !eq(ptx_elt_type, "tf32") : Int32Regs,
- !eq(ptx_elt_type, "s32") : Int32Regs,
- !eq(ptx_elt_type, "b16") : Int32Regs,
- !eq(ptx_elt_type, "b8") : Int32Regs,
- !eq(ptx_elt_type, "b8x16.b6x16_p32") : Int32Regs,
- !eq(ptx_elt_type, "b8x16.b4x16_p64") : Int32Regs,
- !eq(ptx_elt_type, "s8") : Int32Regs,
- !eq(ptx_elt_type, "u8") : Int32Regs,
- !eq(ptx_elt_type, "s4") : Int32Regs,
- !eq(ptx_elt_type, "u4") : Int32Regs,
- !eq(ptx_elt_type, "b1") : Int32Regs);
+ !eq(ptx_elt_type, "f16") : B32,
+ !eq(ptx_elt_type, "f32") : B32,
+ !eq(ptx_elt_type, "f64") : B64,
+ !eq(ptx_elt_type, "bf16") : B32,
+ !eq(ptx_elt_type, "tf32") : B32,
+ !eq(ptx_elt_type, "s32") : B32,
+ !eq(ptx_elt_type, "b16") : B32,
+ !eq(ptx_elt_type, "b8") : B32,
+ !eq(ptx_elt_type, "b8x16.b6x16_p32") : B32,
+ !eq(ptx_elt_type, "b8x16.b4x16_p64") : B32,
+ !eq(ptx_elt_type, "s8") : B32,
+ !eq(ptx_elt_type, "u8") : B32,
+ !eq(ptx_elt_type, "s4") : B32,
+ !eq(ptx_elt_type, "u4") : B32,
+ !eq(ptx_elt_type, "b1") : B32);
// Instruction input/output arguments for the fragment.
list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
@@ -5284,7 +4611,7 @@ class WMMA_INSTR<string _Intr, list<dag> _Args>
class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride>
: WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
[!con((ins ADDR:$src),
- !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
+ !if(WithStride, (ins B32:$ldm), (ins)))]>,
Requires<Frag.Predicates> {
// Load/store intrinsics are overloaded on pointer's address space.
// To match the right intrinsic, we need to build AS-constrained PatFrag.
@@ -5324,7 +4651,7 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
: WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
[!con((ins ADDR:$dst),
Frag.Ins,
- !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
+ !if(WithStride, (ins B32:$ldm), (ins)))]>,
Requires<Frag.Predicates> {
// Load/store intrinsics are overloaded on pointer's address space.
@@ -5539,19 +4866,19 @@ foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
def : MMA_PAT<mma>;
multiclass MAPA<string suffix, Intrinsic Intr> {
- def _32: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
+ def _32: BasicNVPTXInst<(outs B32:$d), (ins B32:$a, B32:$b),
"mapa" # suffix # ".u32",
[(set i32:$d, (Intr i32:$a, i32:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
- def _32i: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
+ def _32i: BasicNVPTXInst<(outs B32:$d), (ins B32:$a, i32imm:$b),
"mapa" # suffix # ".u32",
[(set i32:$d, (Intr i32:$a, imm:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
- def _64: BasicNVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
+ def _64: BasicNVPTXInst<(outs B64:$d), (ins B64:$a, B32:$b),
"mapa" # suffix # ".u64",
[(set i64:$d, (Intr i64:$a, i32:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
- def _64i: BasicNVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
+ def _64i: BasicNVPTXInst<(outs B64:$d), (ins B64:$a, i32imm:$b),
"mapa" # suffix # ".u64",
[(set i64:$d, (Intr i64:$a, imm:$b))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
@@ -5562,11 +4889,11 @@ defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluste
multiclass GETCTARANK<string suffix, Intrinsic Intr> {
- def _32: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+ def _32: BasicNVPTXInst<(outs B32:$d), (ins B32:$a),
"getctarank" # suffix # ".u32",
[(set i32:$d, (Intr i32:$a))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
- def _64: BasicNVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ def _64: BasicNVPTXInst<(outs B32:$d), (ins B64:$a),
"getctarank" # suffix # ".u64",
[(set i32:$d, (Intr i64:$a))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
@@ -5575,7 +4902,7 @@ multiclass GETCTARANK<string suffix, Intrinsic Intr> {
defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
-def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
+def is_explicit_cluster: NVPTXInst<(outs B1:$d), (ins),
"mov.pred\t$d, %is_explicit_cluster;",
[(set i1:$d, (int_nvvm_is_explicit_cluster))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
@@ -5627,9 +4954,9 @@ let isConvergent = true in {
multiclass TCGEN05_ALLOC_INTR<string AS, string num, Intrinsic Intr> {
def "" : BasicNVPTXInst<(outs),
- (ins ADDR:$dst, Int32Regs:$ncols),
+ (ins ADDR:$dst, B32:$ncols),
"tcgen05.alloc.cta_group::" # num # ".sync.aligned" # AS # ".b32",
- [(Intr addr:$dst, Int32Regs:$ncols)]>,
+ [(Intr addr:$dst, B32:$ncols)]>,
Requires<[hasTcgen05Instructions]>;
}
@@ -5641,9 +4968,9 @@ defm TCGEN05_ALLOC_S64_CG2 : TCGEN05_ALLOC_INTR<".shared::cta", "2", int_nvvm_tc
multiclass TCGEN05_DEALLOC_INTR<string num, Intrinsic Intr> {
def "" : BasicNVPTXInst<(outs),
- (ins Int32Regs:$tmem_addr, Int32Regs:$ncols),
+ (ins B32:$tmem_addr, B32:$ncols),
"tcgen05.dealloc.cta_group::" # num # ".sync.aligned.b32",
- [(Intr Int32Regs:$tmem_addr, Int32Regs:$ncols)]>,
+ [(Intr B32:$tmem_addr, B32:$ncols)]>,
Requires<[hasTcgen05Instructions]>;
}
defm TCGEN05_DEALLOC_CG1: TCGEN05_DEALLOC_INTR<"1", int_nvvm_tcgen05_dealloc_cg1>;
@@ -5677,9 +5004,9 @@ multiclass TCGEN05_COMMIT_INTR<string AS, string num> {
prefix # ".b64",
[(Intr addr:$mbar)]>,
Requires<[hasTcgen05Instructions]>;
- def _MC : BasicNVPTXInst<(outs), (ins ADDR:$mbar, Int16Regs:$mc),
+ def _MC : BasicNVPTXInst<(outs), (ins ADDR:$mbar, B16:$mc),
prefix # ".multicast::cluster.b64",
- [(IntrMC addr:$mbar, Int16Regs:$mc)]>,
+ [(IntrMC addr:$mbar, B16:$mc)]>,
Requires<[hasTcgen05Instructions]>;
}
@@ -5711,14 +5038,14 @@ multiclass TCGEN05_CP_INTR<string shape, string src_fmt, string mc = ""> {
defvar IntrCG2 = !cast<Intrinsic>(intr_prefix # "_cg2");
def _cg1 : BasicNVPTXInst<(outs),
- (ins ADDR:$tmem_addr, Int64Regs:$sdesc),
+ (ins ADDR:$tmem_addr, B64:$sdesc),
"tcgen05.cp.cta_group::1." # shape_mc_asm # fmt_asm,
- [(IntrCG1 addr:$tmem_addr, Int64Regs:$sdesc)]>,
+ [(IntrCG1 addr:$tmem_addr, B64:$sdesc)]>,
Requires<[hasTcgen05Instructions]>;
def _cg2 : BasicNVPTXInst<(outs),
- (ins ADDR:$tmem_addr, Int64Regs:$sdesc),
+ (ins ADDR:$tmem_addr, B64:$sdesc),
"tcgen05.cp.cta_group::2." # shape_mc_asm # fmt_asm,
- [(IntrCG2 addr:$tmem_addr, Int64Regs:$sdesc)]>,
+ [(IntrCG2 addr:$tmem_addr, B64:$sdesc)]>,
Requires<[hasTcgen05Instructions]>;
}
@@ -5757,7 +5084,7 @@ class TCGEN05_LDST_INST_NAME<string Op, string shape, int lg2Count, bit packOrUn
// reginfo class tcgen05.{ld, st}
class TCGEN05_LDST_REGINFO<int Veclen> {
// create a list of types for load/store operands
- list<NVPTXRegClass> regs = !listsplat(Int32Regs, Veclen);
+ list<NVPTXRegClass> regs = !listsplat(B32, Veclen);
// generate list of regnames for load/store operands
list<string> reg_names = !foreach(x, !range(0, Veclen), "r" # x);
string regstring = "{{" # !interleave(!foreach(n, !range(0, Veclen), "$r" # n), ", ") # "}}";
@@ -5776,7 +5103,7 @@ class TCGEN05_LD_INST<string Shape, int Num, bit Pack> :
TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
- let InOperandList = !con((ins Int32Regs:$taddr),
+ let InOperandList = !con((ins B32:$taddr),
!if(!eq(Shape, "16x32bx2"), (ins i64imm:$offset), (ins)));
let OutOperandList = Info.Outs;
let AsmString = "tcgen05.ld.sync.aligned"
@@ -5801,7 +5128,7 @@ class TCGEN05_ST_INST<string Shape, int Num, bit Unpack> :
TCGEN05_LDST_REGINFO Info = TCGEN05_LDST_REGINFO<
NVVM_TCGEN05_LDST_ACCESS_SIZE<Shape, Num>.veclen>;
- let InOperandList = !con((ins Int32Regs:$taddr),
+ let InOperandList = !con((ins B32:$taddr),
!if(!eq(Shape, "16x32bx2"), (ins i64imm:$offset), (ins)),
Info.Ins);
let OutOperandList = (outs);
@@ -5836,13 +5163,13 @@ foreach shape = ["16x64b", "16x128b", "16x256b", "32x32b", "16x32bx2"] in {
def st_bulk_imm : TImmLeaf<i64, [{ return Imm == 0; }]>;
def INT_NVVM_ST_BULK_GENERIC :
- BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, Int64Regs:$size, i64imm:$value),
+ BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, B64:$size, i64imm:$value),
"st.bulk",
[(int_nvvm_st_bulk addr:$dest_addr, i64:$size, st_bulk_imm:$value)]>,
Requires<[hasSM<100>, hasPTX<86>]>;
def INT_NVVM_ST_BULK_SHARED_CTA:
- BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, Int64Regs:$size, i64imm:$value),
+ BasicNVPTXInst<(outs), (ins ADDR:$dest_addr, B64:$size, i64imm:$value),
"st.bulk.shared::cta",
[(int_nvvm_st_bulk_shared_cta addr:$dest_addr, i64:$size, st_bulk_imm:$value)]>,
Requires<[hasSM<100>, hasPTX<86>]>;
@@ -5870,7 +5197,7 @@ def clusterlaunchcontrol_query_cancel_is_canceled:
SDTClusterLaunchControlQueryCancelIsCanceled, []>;
def CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED:
- NVPTXInst<(outs Int1Regs:$pred), (ins Int64Regs:$try_cancel_response0, Int64Regs:$try_cancel_response1),
+ NVPTXInst<(outs B1:$pred), (ins B64:$try_cancel_response0, B64:$try_cancel_response1),
"{{\n\t" #
".reg .b128 %clc_handle;\n\t" #
"mov.b128 %clc_handle, {$try_cancel_response0, $try_cancel_response1};\n\t" #
@@ -5880,7 +5207,7 @@ def CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED:
Requires<[hasSM<100>, hasPTX<86>]>;
class CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID<string Dim>:
- NVPTXInst<(outs Int32Regs:$reg), (ins Int64Regs:$try_cancel_response0, Int64Regs:$try_cancel_response1),
+ NVPTXInst<(outs B32:$reg), (ins B64:$try_cancel_response0, B64:$try_cancel_response1),
"{{\n\t" #
".reg .b128 %clc_handle;\n\t" #
"mov.b128 %clc_handle, {$try_cancel_response0, $try_cancel_response1};\n\t" #
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index eb60e1502cf90..aa07d510b3a12 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -25,9 +25,9 @@ using namespace llvm;
namespace llvm {
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
- if (RC == &NVPTX::Int128RegsRegClass)
+ if (RC == &NVPTX::B128RegClass)
return ".b128";
- if (RC == &NVPTX::Int64RegsRegClass)
+ if (RC == &NVPTX::B64RegClass)
// We use untyped (.b) integer registers here as NVCC does.
// Correctness of generated code does not depend on register type,
// but using .s/.u registers runs into ptxas bug that prevents
@@ -47,11 +47,11 @@ StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
// add.f16v2 rb32,rb32,rb32; // OK
// add.f16v2 rs32,rs32,rs32; // OK
return ".b64";
- if (RC == &NVPTX::Int32RegsRegClass)
+ if (RC == &NVPTX::B32RegClass)
return ".b32";
- if (RC == &NVPTX::Int16RegsRegClass)
+ if (RC == &NVPTX::B16RegClass)
return ".b16";
- if (RC == &NVPTX::Int1RegsRegClass)
+ if (RC == &NVPTX::B1RegClass)
return ".pred";
if (RC == &NVPTX::SpecialRegsRegClass)
return "!Special!";
@@ -59,15 +59,15 @@ StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
}
StringRef getNVPTXRegClassStr(TargetRegisterClass const *RC) {
- if (RC == &NVPTX::Int128RegsRegClass)
+ if (RC == &NVPTX::B128RegClass)
return "%rq";
- if (RC == &NVPTX::Int64RegsRegClass)
+ if (RC == &NVPTX::B64RegClass)
return "%rd";
- if (RC == &NVPTX::Int32RegsRegClass)
+ if (RC == &NVPTX::B32RegClass)
return "%r";
- if (RC == &NVPTX::Int16RegsRegClass)
+ if (RC == &NVPTX::B16RegClass)
return "%rs";
- if (RC == &NVPTX::Int1RegsRegClass)
+ if (RC == &NVPTX::B1RegClass)
return "%p";
if (RC == &NVPTX::SpecialRegsRegClass)
return "!Special!";
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 2eea9e9721cdf..9fac97d97c609 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -55,23 +55,15 @@ foreach i = 0...31 in {
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
-def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
-def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
-def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32,
+def B1 : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
+def B16 : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
+def B32 : NVPTXRegClass<[i32, v2f16, v2bf16, v2i16, v4i8, f32], 32,
(add (sequence "R%u", 0, 4),
VRFrame32, VRFrameLocal32)>;
-def Int64Regs : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
+def B64 : NVPTXRegClass<[i64, f64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
// 128-bit regs are not defined as general regs in NVPTX. They are used for inlineASM only.
-def Int128Regs : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>;
-
-def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
-def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 4))>;
-def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
-def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
+def B128 : NVPTXRegClass<[i128], 128, (add (sequence "RQ%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame32, VRFrameLocal32, VRDepot,
(sequence "ENVREG%u", 0, 31))>;
-
-defvar Float32Regs = Int32Regs;
-defvar Float64Regs = Int64Regs;
diff --git a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
index ef8394005943c..08b89059f80bd 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
@@ -12,8 +12,8 @@
---
name: test
registers:
- - { id: 0, class: int32regs }
- - { id: 1, class: int32regs }
+ - { id: 0, class: b32 }
+ - { id: 1, class: b32 }
body: |
bb.0.entry:
%0 = LD_i32 0, 4, 1, 2, 32, &test_param_0, 0
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
index 146a45a9b1c20..bb36b1df115d1 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -30,24 +30,24 @@
---
name: test
registers:
- - { id: 0, class: int32regs }
- - { id: 1, class: int64regs }
- - { id: 2, class: int32regs }
- - { id: 3, class: int64regs }
- - { id: 4, class: int32regs }
- - { id: 5, class: int32regs }
- - { id: 6, class: int32regs }
- - { id: 7, class: int32regs }
+ - { id: 0, class: b32 }
+ - { id: 1, class: b64 }
+ - { id: 2, class: b32 }
+ - { id: 3, class: b64 }
+ - { id: 4, class: b32 }
+ - { id: 5, class: b32 }
+ - { id: 6, class: b32 }
+ - { id: 7, class: b32 }
body: |
bb.0.entry:
%0 = LD_i32 0, 0, 4, 2, 32, &test_param_0, 0
%1 = CVT_f64_f32 %0, 0
%2 = LD_i32 0, 0, 4, 0, 32, &test_param_1, 0
- ; CHECK: %3:int64regs = FADD_rnf64ri %1, double 3.250000e+00
+ ; CHECK: %3:b64 = FADD_rnf64ri %1, double 3.250000e+00
%3 = FADD_rnf64ri %1, double 3.250000e+00
%4 = CVT_f32_f64 %3, 5
%5 = CVT_f32_s32 %2, 5
- ; CHECK: %6:int32regs = FADD_rnf32ri %5, float 6.250000e+00
+ ; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00
%6 = FADD_rnf32ri %5, float 6.250000e+00
%7 = FMUL_rnf32rr %6, %4
StoreRetvalI32 %7, 0
@@ -56,24 +56,24 @@ body: |
---
name: test2
registers:
- - { id: 0, class: int32regs }
- - { id: 1, class: int64regs }
- - { id: 2, class: int32regs }
- - { id: 3, class: int64regs }
- - { id: 4, class: int32regs }
- - { id: 5, class: int32regs }
- - { id: 6, class: int32regs }
- - { id: 7, class: int32regs }
+ - { id: 0, class: b32 }
+ - { id: 1, class: b64 }
+ - { id: 2, class: b32 }
+ - { id: 3, class: b64 }
+ - { id: 4, class: b32 }
+ - { id: 5, class: b32 }
+ - { id: 6, class: b32 }
+ - { id: 7, class: b32 }
body: |
bb.0.entry:
%0 = LD_i32 0, 0, 4, 2, 32, &test2_param_0, 0
%1 = CVT_f64_f32 %0, 0
%2 = LD_i32 0, 0, 4, 0, 32, &test2_param_1, 0
- ; CHECK: %3:int64regs = FADD_rnf64ri %1, double 0x7FF8000000000000
+ ; CHECK: %3:b64 = FADD_rnf64ri %1, double 0x7FF8000000000000
%3 = FADD_rnf64ri %1, double 0x7FF8000000000000
%4 = CVT_f32_f64 %3, 5
%5 = CVT_f32_s32 %2, 5
- ; CHECK: %6:int32regs = FADD_rnf32ri %5, float 0x7FF8000000000000
+ ; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000
%6 = FADD_rnf32ri %5, float 0x7FF8000000000000
%7 = FMUL_rnf32rr %6, %4
StoreRetvalI32 %7, 0
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
index c5bed1244d50e..71108f8b37175 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
@@ -12,8 +12,8 @@
---
name: test
registers:
- - { id: 0, class: int32regs }
- - { id: 1, class: int32regs }
+ - { id: 0, class: b32 }
+ - { id: 1, class: b32 }
body: |
bb.0.entry:
%0 = LD_i32 0, 4, 1, 2, 32, &test_param_0, 0
diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir
index b09d889815db7..4d80d52de8da8 100644
--- a/llvm/test/CodeGen/NVPTX/branch-fold.mir
+++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir
@@ -33,12 +33,12 @@ name: hoge
alignment: 1
tracksRegLiveness: true
registers:
- - { id: 0, class: int64regs }
- - { id: 1, class: int64regs }
- - { id: 2, class: int1regs }
- - { id: 3, class: int64regs }
- - { id: 4, class: int1regs }
- - { id: 5, class: int64regs }
+ - { id: 0, class: b64 }
+ - { id: 1, class: b64 }
+ - { id: 2, class: b1 }
+ - { id: 3, class: b64 }
+ - { id: 4, class: b1 }
+ - { id: 5, class: b64 }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -47,18 +47,18 @@ body: |
; CHECK: bb.0.bb:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: CBranch undef %2:int1regs, %bb.3
+ ; CHECK-NEXT: CBranch undef %2:b1, %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.bb1.preheader:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:int64regs = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:b64 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb1:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:int64regs = ADDi64ri [[ADDi64ri]], 1
- ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:int1regs = SETP_s64ri [[ADDi64ri]], 1, 2
+ ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:b64 = ADDi64ri [[ADDi64ri]], 1
+ ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_s64ri [[ADDi64ri]], 1, 2
; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.bb4:
@@ -68,16 +68,16 @@ body: |
bb.0.bb:
successors: %bb.1, %bb.3
- CBranch undef %2:int1regs, %bb.3
+ CBranch undef %2:b1, %bb.3
bb.1.bb1.preheader:
- %5:int64regs = IMPLICIT_DEF
+ %5:b64 = IMPLICIT_DEF
bb.2.bb1:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
- %5:int64regs = ADDi64ri %5, 1
- %4:int1regs = SETP_s64ri %5, 1, 2
+ %5:b64 = ADDi64ri %5, 1
+ %4:b1 = SETP_s64ri %5, 1, 2
CBranch %4, %bb.2
bb.3.bb4:
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
index c2c87b6b24285..a1d8d0590f160 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
@@ -33,18 +33,18 @@ debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
registers:
- - { id: 0, class: int32regs, preferred-register: '' }
- - { id: 1, class: int32regs, preferred-register: '' }
- - { id: 2, class: int32regs, preferred-register: '' }
- - { id: 3, class: int32regs, preferred-register: '' }
- - { id: 4, class: int32regs, preferred-register: '' }
- - { id: 5, class: int32regs, preferred-register: '' }
- - { id: 6, class: int32regs, preferred-register: '' }
- - { id: 7, class: int32regs, preferred-register: '' }
- - { id: 8, class: int32regs, preferred-register: '' }
- - { id: 9, class: int32regs, preferred-register: '' }
- - { id: 10, class: int32regs, preferred-register: '' }
- - { id: 11, class: int32regs, preferred-register: '' }
+ - { id: 0, class: b32, preferred-register: '' }
+ - { id: 1, class: b32, preferred-register: '' }
+ - { id: 2, class: b32, preferred-register: '' }
+ - { id: 3, class: b32, preferred-register: '' }
+ - { id: 4, class: b32, preferred-register: '' }
+ - { id: 5, class: b32, preferred-register: '' }
+ - { id: 6, class: b32, preferred-register: '' }
+ - { id: 7, class: b32, preferred-register: '' }
+ - { id: 8, class: b32, preferred-register: '' }
+ - { id: 9, class: b32, preferred-register: '' }
+ - { id: 10, class: b32, preferred-register: '' }
+ - { id: 11, class: b32, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
@@ -77,20 +77,20 @@ constants: []
machineFunctionInfo: {}
body: |
bb.0:
- %0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
+ %0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0
; CHECK-NOT: ProxyReg
- %4:int32regs = ProxyRegB32 killed %0
- %5:int32regs = ProxyRegB32 killed %1
- %6:int32regs = ProxyRegB32 killed %2
- %7:int32regs = ProxyRegB32 killed %3
+ %4:b32 = ProxyRegB32 killed %0
+ %5:b32 = ProxyRegB32 killed %1
+ %6:b32 = ProxyRegB32 killed %2
+ %7:b32 = ProxyRegB32 killed %3
; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3
StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
- %8:int32regs = LoadParamMemI32 0
+ %8:b32 = LoadParamMemI32 0
; CHECK-NOT: ProxyReg
- %9:int32regs = ProxyRegB32 killed %8
- %10:int32regs = ProxyRegB32 killed %9
- %11:int32regs = ProxyRegB32 killed %10
+ %9:b32 = ProxyRegB32 killed %8
+ %10:b32 = ProxyRegB32 killed %9
+ %11:b32 = ProxyRegB32 killed %10
; CHECK: StoreRetvalI32 killed %8
StoreRetvalI32 killed %11, 0
Return
diff --git a/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll b/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll
index 7a42268650c63..6055a49c98526 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-bool-var.ll
@@ -13,7 +13,7 @@ entry:
; }
;
; CHECK-LABEL: Machine code for function test1
- ; CHECK: DBG_VALUE %[[#]]:int32regs, $noreg, !"xyz", !DIExpression(), debug-location ![[#]]; test.cu:2 line no:6
+ ; CHECK: DBG_VALUE %[[#]]:b32, $noreg, !"xyz", !DIExpression(), debug-location ![[#]]; test.cu:2 line no:6
;
%cmp = icmp eq i32 %gid, 0, !dbg !12
%conv = zext i1 %cmp to i32, !dbg !12
@@ -35,7 +35,7 @@ entry:
; }
;
; CHECK-LABEL: Machine code for function test2
- ; CHECK: DBG_VALUE %[[#]]:int32regs, $noreg, !"abc", !DIExpression(), debug-location ![[#]]; test.cu:12 line no:11
+ ; CHECK: DBG_VALUE %[[#]]:b32, $noreg, !"abc", !DIExpression(), debug-location ![[#]]; test.cu:12 line no:11
;
%cmp = icmp eq i32 %gid, 0, !dbg !17
%conv = zext i1 %cmp to i32, !dbg !17
More information about the llvm-commits
mailing list