[PATCH] D144911: adding bf16 support to NVPTX
Kushan Ahmadian via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 5 09:18:26 PDT 2023
kushanam updated this revision to Diff 528475.
kushanam added a comment.
Rebasing the D144911 <https://reviews.llvm.org/D144911> patch
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D144911/new/
https://reviews.llvm.org/D144911
Files:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Index: llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -998,17 +998,17 @@
FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
[hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX70, hasSM80]>,
+ FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, BFloat16Regs, [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, BFloat16Regs,
- [hasPTX70, hasSM80]>,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, BFloat16Regs,
- [hasPTX70, hasSM80]>,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, BFloat16Regs,
- [hasPTX70, hasSM80]>,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, BFloat16Regs,
- [hasPTX70, hasSM80]>,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, BFloat16Regs,
- [hasPTX70, hasSM80]>,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
[hasPTX<42>, hasSM<53>]>,
@@ -1022,10 +1022,10 @@
[hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
Float16x2Regs, [hasPTX<70>, hasSM<80>]>,
- FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
- [hasPTX70, hasSM80]>,
- FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
- [hasPTX70, hasSM80]>
+ FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, BFloat16x2Regs,
+ [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, BFloat16x2Regs,
+ [hasPTX<70>, hasSM<80>]>
] in {
def P.Variant :
F_MATH_3<!strconcat("fma",
Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1121,7 +1121,7 @@
NVPTXInst<(outs RC:$dst), (ins RC:$src),
!strconcat(OpcStr, " \t$dst, $src;"),
[(set RC:$dst, (fneg (T RC:$src)))]>,
- Requires<[useFP16Math, hasPTX70, hasSM80, Pred]>;
+ Requires<[useFP16Math, hasPTX<70>, hasSM<80>, Pred]>;
def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, BFloat16Regs, doF32FTZ>;
def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, BFloat16Regs, True>;
def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, BFloat16x2Regs, doF32FTZ>;
@@ -3337,30 +3337,6 @@
" mov.b32 \t{%tmp_lo, $dst}, $src; }}",
[(set BFloat16Regs:$dst,
(extractelt (v2bf16 BFloat16x2Regs:$src), 1))]>;
-
- // // Coalesce two bf16 registers into bf16x2
- // def BuildBF16x2 : NVPTXInst<(outs BFloat16x2Regs:$dst),
- // (ins BFloat16Regs:$a, BFloat16Regs:$b),
- // "mov.b32 \t$dst, {{$a, $b}};",
- // [(set (v2bf16 BFloat16x2Regs:$dst),
- // (build_vector (bf16 BFloat16Regs:$a), (bf16 BFloat16Regs:$b)))]>;
-
- // // Directly initializing underlying the b32 register is one less SASS
- // // instruction than than vector-packing move.
- // def BuildBF16x2i : NVPTXInst<(outs BFloat16x2Regs:$dst), (ins i32imm:$src),
- // "mov.b32 \t$dst, $src;",
- // []>;
-
- // // Split f16x2 into two f16 registers.
- // def SplitBF16x2 : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi),
- // (ins BFloat16x2Regs:$src),
- // "mov.b32 \t{{$lo, $hi}}, $src;",
- // []>;
- // // Split an i32 into two f16
- // def SplitI32toBF16x2 : NVPTXInst<(outs BFloat16Regs:$lo, BFloat16Regs:$hi),
- // (ins Int32Regs:$src),
- // "mov.b32 \t{{$lo, $hi}}, $src;",
- // []>;
}
// Count leading zeros
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144911.528475.patch
Type: text/x-patch
Size: 4301 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20230605/abc58851/attachment-0001.bin>
More information about the cfe-commits
mailing list