[llvm] 4d387c4 - [X86] Add custom operation actions for f16: FABS, FNEG, and FCOPYSIGN (#128877)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 01:28:53 PST 2025
Author: Prashanth
Date: 2025-02-27T09:28:50Z
New Revision: 4d387c4455b78e3334f12f25adf222e67f0be050
URL: https://github.com/llvm/llvm-project/commit/4d387c4455b78e3334f12f25adf222e67f0be050
DIFF: https://github.com/llvm/llvm-project/commit/4d387c4455b78e3334f12f25adf222e67f0be050.diff
LOG: [X86] Add custom operation actions for f16: FABS, FNEG, and FCOPYSIGN (#128877)
This pull request adds custom handling for several floating-point
operations for the `f16` type with respect to
(https://github.com/llvm/llvm-project/issues/126892)..
Fixes #126892
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fp16-libcalls.ll
llvm/test/CodeGen/X86/half.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0dc73bbf8e3f9..393f82e670deb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -701,6 +701,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FABS, MVT::f16, Custom);
+ setOperationAction(ISD::FNEG, MVT::f16, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll
index 57db9636e5d24..623228985d150 100644
--- a/llvm/test/CodeGen/X86/fp16-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll
@@ -59,12 +59,10 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind {
define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind {
; F16C-LABEL: test_half_copysign:
; F16C: # %bb.0:
-; F16C-NEXT: vpextrw $0, %xmm1, %eax
-; F16C-NEXT: andl $32768, %eax # imm = 0x8000
-; F16C-NEXT: vpextrw $0, %xmm0, %ecx
-; F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; F16C-NEXT: orl %eax, %ecx
-; F16C-NEXT: movw %cx, (%rdi)
+; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; F16C-NEXT: vpor %xmm1, %xmm0, %xmm0
+; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_copysign:
@@ -76,23 +74,23 @@ define void @test_half_copysign(half %a0, half %a1, ptr %p0) nounwind {
;
; X64-LABEL: test_half_copysign:
; X64: # %bb.0:
-; X64-NEXT: pextrw $0, %xmm1, %eax
-; X64-NEXT: andl $32768, %eax # imm = 0x8000
-; X64-NEXT: pextrw $0, %xmm0, %ecx
-; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; X64-NEXT: orl %eax, %ecx
-; X64-NEXT: movw %cx, (%rdi)
+; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: por %xmm1, %xmm0
+; X64-NEXT: pextrw $0, %xmm0, %eax
+; X64-NEXT: movw %ax, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: test_half_copysign:
; X86: # %bb.0:
+; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $32768, %ecx # imm = 0x8000
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: andl $32767, %edx # imm = 0x7FFF
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: movw %dx, (%eax)
+; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-NEXT: por %xmm1, %xmm0
+; X86-NEXT: pextrw $0, %xmm0, %ecx
+; X86-NEXT: movw %cx, (%eax)
; X86-NEXT: retl
%res = call half @llvm.copysign.half(half %a0, half %a1)
store half %res, ptr %p0, align 2
@@ -334,9 +332,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
define void @test_half_fabs(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_fabs:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
-; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
@@ -349,14 +345,9 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
;
; X64-LABEL: test_half_fabs:
; X64: # %bb.0:
-; X64-NEXT: pushq %rbx
-; X64-NEXT: movq %rdi, %rbx
-; X64-NEXT: callq __extendhfsf2 at PLT
; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: callq __truncsfhf2 at PLT
; X64-NEXT: pextrw $0, %xmm0, %eax
-; X64-NEXT: movw %ax, (%rbx)
-; X64-NEXT: popq %rbx
+; X64-NEXT: movw %ax, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: test_half_fabs:
@@ -514,9 +505,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
define void @test_half_fneg(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_fneg:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
-; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
@@ -529,14 +518,9 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
;
; X64-LABEL: test_half_fneg:
; X64: # %bb.0:
-; X64-NEXT: pushq %rbx
-; X64-NEXT: movq %rdi, %rbx
-; X64-NEXT: callq __extendhfsf2 at PLT
; X64-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: callq __truncsfhf2 at PLT
; X64-NEXT: pextrw $0, %xmm0, %eax
-; X64-NEXT: movw %ax, (%rbx)
-; X64-NEXT: popq %rbx
+; X64-NEXT: movw %ax, (%rdi)
; X64-NEXT: retq
;
; X86-LABEL: test_half_fneg:
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 2472e6e19c862..fb836cd2480a7 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -1041,7 +1041,6 @@ define void @main.158() #0 {
; CHECK-LIBCALL: # %bb.0: # %entry
; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0
-; CHECK-LIBCALL-NEXT: callq __truncsfhf2 at PLT
; CHECK-LIBCALL-NEXT: callq __extendhfsf2 at PLT
; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1
@@ -1059,10 +1058,10 @@ define void @main.158() #0 {
; BWON-F16C-LABEL: main.158:
; BWON-F16C: # %bb.0: # %entry
; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm1
-; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
-; BWON-F16C-NEXT: vmovss {{.*#+}} xmm2 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; BWON-F16C-NEXT: vucomiss %xmm1, %xmm2
+; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
+; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = [8.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1
+; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0
; BWON-F16C-NEXT: jae .LBB20_2
; BWON-F16C-NEXT: # %bb.1: # %entry
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
@@ -1074,8 +1073,7 @@ define void @main.158() #0 {
; CHECK-I686-LABEL: main.158:
; CHECK-I686: # %bb.0: # %entry
; CHECK-I686-NEXT: subl $12, %esp
-; CHECK-I686-NEXT: movl $0, (%esp)
-; CHECK-I686-NEXT: calll __truncsfhf2
+; CHECK-I686-NEXT: pxor %xmm0, %xmm0
; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax
; CHECK-I686-NEXT: movw %ax, (%esp)
; CHECK-I686-NEXT: calll __extendhfsf2
@@ -1192,32 +1190,25 @@ entry:
define half @fcopysign(half %x, half %y) {
; CHECK-LIBCALL-LABEL: fcopysign:
; CHECK-LIBCALL: # %bb.0:
-; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax
-; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000
-; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx
-; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; CHECK-LIBCALL-NEXT: orl %eax, %ecx
-; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0
+; CHECK-LIBCALL-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-LIBCALL-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-LIBCALL-NEXT: orps %xmm1, %xmm0
; CHECK-LIBCALL-NEXT: retq
;
; BWON-F16C-LABEL: fcopysign:
; BWON-F16C: # %bb.0:
-; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax
-; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000
-; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx
-; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; BWON-F16C-NEXT: orl %eax, %ecx
-; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0
+; BWON-F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; BWON-F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; BWON-F16C-NEXT: vorps %xmm1, %xmm0, %xmm0
; BWON-F16C-NEXT: retq
;
; CHECK-I686-LABEL: fcopysign:
; CHECK-I686: # %bb.0:
-; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000
-; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax
-; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF
-; CHECK-I686-NEXT: orl %eax, %ecx
-; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0
+; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1
+; CHECK-I686-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; CHECK-I686-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; CHECK-I686-NEXT: por %xmm1, %xmm0
; CHECK-I686-NEXT: retl
%a = call half @llvm.copysign.f16(half %x, half %y)
ret half %a
More information about the llvm-commits
mailing list