[llvm] 45d4698 - [NVPTX] designate fabs and fneg as free (#121513)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 16:54:40 PST 2025
Author: Alex MacLean
Date: 2025-01-07T16:54:37-08:00
New Revision: 45d46983bf7bda53bd7ee8e36a47571b3980fbd7
URL: https://github.com/llvm/llvm-project/commit/45d46983bf7bda53bd7ee8e36a47571b3980fbd7
DIFF: https://github.com/llvm/llvm-project/commit/45d46983bf7bda53bd7ee8e36a47571b3980fbd7.diff
LOG: [NVPTX] designate fabs and fneg as free (#121513)
Added:
llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.h
llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 4a98fe21b81dc6..c9b7e874556990 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -261,6 +261,9 @@ class NVPTXTargetLowering : public TargetLowering {
return true;
}
+ bool isFAbsFree(EVT VT) const override { return true; }
+ bool isFNegFree(EVT VT) const override { return true; }
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index 03cdeb9683abae..8be3a66b7f4836 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -182,8 +182,8 @@ define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.u32 %r1, [test_fneg_param_0];
-; CHECK-NEXT: xor.b32 %r2, %r1, -2147450880;
+; CHECK-NEXT: ld.param.b32 %r1, [test_fneg_param_0];
+; CHECK-NEXT: neg.bf16x2 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%r = fneg <2 x bfloat> %a
@@ -532,8 +532,8 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.u32 %r1, [test_fabs_param_0];
-; CHECK-NEXT: and.b32 %r2, %r1, 2147450879;
+; CHECK-NEXT: ld.param.b32 %r1, [test_fabs_param_0];
+; CHECK-NEXT: abs.bf16x2 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%r = call <2 x bfloat> @llvm.fabs.f16(<2 x bfloat> %a)
diff --git a/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll b/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll
new file mode 100644
index 00000000000000..9031f33939f2fe
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
+target triple = "nvptx64-nvidia-cuda"
+
+define float @fabs_free(i32 %in) {
+; CHECK-LABEL: fabs_free(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [fabs_free_param_0];
+; CHECK-NEXT: abs.f32 %f2, %f1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
+; CHECK-NEXT: ret;
+ %b = bitcast i32 %in to float
+ %f = call float @llvm.fabs.f32(float %b)
+ ret float %f
+}
+
+define float @fneg_free(i32 %in) {
+; CHECK-LABEL: fneg_free(
+; CHECK: {
+; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [fneg_free_param_0];
+; CHECK-NEXT: neg.f32 %f2, %f1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
+; CHECK-NEXT: ret;
+ %b = bitcast i32 %in to float
+ %f = fneg float %b
+ ret float %f
+}
More information about the llvm-commits
mailing list