[llvm] e846148 - [LLVM][NVPTX] Add support for div.full instruction (#116482)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 15:27:46 PST 2024
Author: Pradeep Kumar
Date: 2024-11-27T04:57:42+05:30
New Revision: e84614833e52ce9a7bebfa8d0d1af3298b6977b9
URL: https://github.com/llvm/llvm-project/commit/e84614833e52ce9a7bebfa8d0d1af3298b6977b9
DIFF: https://github.com/llvm/llvm-project/commit/e84614833e52ce9a7bebfa8d0d1af3298b6977b9.diff
LOG: [LLVM][NVPTX] Add support for div.full instruction (#116482)
This commit adds NVPTX support for div.full PTX instruction with test
under div.ll. [For more information, see PTX
ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#floating-point-instructions-div)
Added:
llvm/test/CodeGen/NVPTX/div.ll
Modified:
llvm/include/llvm/IR/IntrinsicsNVVM.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 115fcee0b04f22..8802ca2534355c 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -820,6 +820,13 @@ let TargetPrefix = "nvvm" in {
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
+ def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">,
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
+ def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">,
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
+
//
// Sad
//
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 5878940812f62b..5528e7b9fe0dda 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1096,6 +1096,18 @@ def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
+def : Pat<(int_nvvm_div_full Float32Regs:$a, Float32Regs:$b),
+ (FDIV32rr Float32Regs:$a, Float32Regs:$b)>;
+
+def : Pat<(int_nvvm_div_full Float32Regs:$a, fpimm:$b),
+ (FDIV32ri Float32Regs:$a, f32imm:$b)>;
+
+def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, Float32Regs:$b),
+ (FDIV32rr_ftz Float32Regs:$a, Float32Regs:$b)>;
+
+def : Pat<(int_nvvm_div_full_ftz Float32Regs:$a, fpimm:$b),
+ (FDIV32ri_ftz Float32Regs:$a, f32imm:$b)>;
+
//
// Sad
//
diff --git a/llvm/test/CodeGen/NVPTX/div.ll b/llvm/test/CodeGen/NVPTX/div.ll
new file mode 100644
index 00000000000000..1df30103849178
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/div.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 | %ptxas-verify %}
+
+define float @div_full(float %a, float %b) {
+; CHECK-LABEL: div_full(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<9>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [div_full_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [div_full_param_1];
+; CHECK-NEXT: div.full.f32 %f3, %f1, %f2;
+; CHECK-NEXT: mov.f32 %f4, 0f40400000;
+; CHECK-NEXT: div.full.f32 %f5, %f3, %f4;
+; CHECK-NEXT: div.full.ftz.f32 %f6, %f5, %f2;
+; CHECK-NEXT: mov.f32 %f7, 0f40800000;
+; CHECK-NEXT: div.full.ftz.f32 %f8, %f6, %f7;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f8;
+; CHECK-NEXT: ret;
+ %1 = call float @llvm.nvvm.div.full(float %a, float %b)
+ %2 = call float @llvm.nvvm.div.full(float %1, float 3.0)
+ %3 = call float @llvm.nvvm.div.full.ftz(float %2, float %b)
+ %4 = call float @llvm.nvvm.div.full.ftz(float %3, float 4.0)
+ ret float %4
+}
More information about the llvm-commits
mailing list