[llvm] b0d2411 - [NVPTX] Support copysign PTX instruction (#107800)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 17:37:12 PDT 2024
Author: Alex MacLean
Date: 2024-09-09T17:37:09-07:00
New Revision: b0d2411b53a0b55baf6d6dc7986d285ce59807fa
URL: https://github.com/llvm/llvm-project/commit/b0d2411b53a0b55baf6d6dc7986d285ce59807fa
DIFF: https://github.com/llvm/llvm-project/commit/b0d2411b53a0b55baf6d6dc7986d285ce59807fa.diff
LOG: [NVPTX] Support copysign PTX instruction (#107800)
Lower `fcopysign` SDNodes into `copysign` PTX instructions where
possible. See [PTX ISA: 9.7.3.2. Floating Point Instructions: copysign]
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-copysign).
Added:
llvm/test/CodeGen/NVPTX/copysign.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/NVPTX/math-intrins.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 5c5766a8b23455..3816e099537199 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -838,8 +838,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
// These map to corresponding instructions for f32/f64. f16 must be
// promoted to f32. v2f16 is expanded to f16, which is then promoted
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 0c883093dd0a54..e8e8548120131e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -977,6 +977,20 @@ def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_fabs_d>;
+//
+// copysign
+//
+
+def COPYSIGN_F :
+ NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1),
+ "copysign.f32 \t$dst, $src0, $src1;",
+ [(set Float32Regs:$dst, (fcopysign Float32Regs:$src1, Float32Regs:$src0))]>;
+
+def COPYSIGN_D :
+ NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1),
+ "copysign.f64 \t$dst, $src0, $src1;",
+ [(set Float64Regs:$dst, (fcopysign Float64Regs:$src1, Float64Regs:$src0))]>;
+
//
// Abs, Neg bf16, bf16x2
//
diff --git a/llvm/test/CodeGen/NVPTX/copysign.ll b/llvm/test/CodeGen/NVPTX/copysign.ll
new file mode 100644
index 00000000000000..96fb37a129b207
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/copysign.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+define float @fcopysign_f(float %a, float %b) {
+; CHECK-LABEL: fcopysign_f(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_param_0];
+; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_param_1];
+; CHECK-NEXT: copysign.f32 %f3, %f2, %f1;
+; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3;
+; CHECK-NEXT: ret;
+ %val = call float @llvm.copysign.f32(float %a, float %b)
+ ret float %val
+}
+
+define double @fcopysign_d(double %a, double %b) {
+; CHECK-LABEL: fcopysign_d(
+; CHECK: {
+; CHECK-NEXT: .reg .f64 %fd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_param_0];
+; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_param_1];
+; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1;
+; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3;
+; CHECK-NEXT: ret;
+ %val = call double @llvm.copysign.f64(double %a, double %b)
+ ret double %val
+}
+
+declare float @llvm.copysign.f32(float, float)
+declare double @llvm.copysign.f64(double, double)
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index fcc4ec6e4017f7..bdd6c914384601 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -195,9 +195,8 @@ define double @round_double(double %a) {
; check the use of 0.5 to implement round
; CHECK-LABEL: round_double(
; CHECK: {
-; CHECK-NEXT: .reg .pred %p<4>;
-; CHECK-NEXT: .reg .b64 %rd<4>;
-; CHECK-NEXT: .reg .f64 %fd<10>;
+; CHECK-NEXT: .reg .pred %p<3>;
+; CHECK-NEXT: .reg .f64 %fd<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.f64 %fd1, [round_double_param_0];
@@ -206,16 +205,10 @@ define double @round_double(double %a) {
; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FE0000000000000;
; CHECK-NEXT: cvt.rzi.f64.f64 %fd4, %fd3;
; CHECK-NEXT: selp.f64 %fd5, 0d0000000000000000, %fd4, %p1;
-; CHECK-NEXT: abs.f64 %fd6, %fd5;
-; CHECK-NEXT: neg.f64 %fd7, %fd6;
-; CHECK-NEXT: mov.b64 %rd1, %fd1;
-; CHECK-NEXT: shr.u64 %rd2, %rd1, 63;
-; CHECK-NEXT: and.b64 %rd3, %rd2, 1;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd3, 1;
-; CHECK-NEXT: selp.f64 %fd8, %fd7, %fd6, %p2;
-; CHECK-NEXT: setp.gt.f64 %p3, %fd2, 0d4330000000000000;
-; CHECK-NEXT: selp.f64 %fd9, %fd1, %fd8, %p3;
-; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd9;
+; CHECK-NEXT: copysign.f64 %fd6, %fd1, %fd5;
+; CHECK-NEXT: setp.gt.f64 %p2, %fd2, 0d4330000000000000;
+; CHECK-NEXT: selp.f64 %fd7, %fd1, %fd6, %p2;
+; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7;
; CHECK-NEXT: ret;
%b = call double @llvm.round.f64(double %a)
ret double %b
More information about the llvm-commits
mailing list