[llvm] [X86][LegalizeDAG] FPOWI: promote f16 operand (PR #105775)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 05:50:12 PDT 2024
https://github.com/v01dXYZ updated https://github.com/llvm/llvm-project/pull/105775
>From 9f0f90fa4ecf8d79b01b45b707be3b36e928dc58 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Wed, 28 Aug 2024 14:28:11 +0200
Subject: [PATCH 1/2] test pre-commit: Legalize FPOWI (f16) with Promote
Instead of defaulting to Expand.
Warning: The added test case fails with asserts enabled.
---
llvm/test/CodeGen/X86/fp16-libcalls.ll | 91 ++++++++++++++++++++++++++
1 file changed, 91 insertions(+)
diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll
index 933971212f11de..7a6e9a8de41e17 100644
--- a/llvm/test/CodeGen/X86/fp16-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll
@@ -926,6 +926,97 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
ret void
}
+define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
+; F16C-LABEL: test_half_powi:
+; F16C: # %bb.0:
+; F16C-NEXT: pushq %rbx
+; F16C-NEXT: movq %rsi, %rbx
+; F16C-NEXT: vpextrw $0, %xmm0, %eax
+; F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm1
+; F16C-NEXT: vmovd %eax, %xmm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT: callq powf at PLT
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; F16C-NEXT: vmovd %xmm0, %eax
+; F16C-NEXT: movw %ax, (%rbx)
+; F16C-NEXT: popq %rbx
+; F16C-NEXT: retq
+;
+; FP16-LABEL: test_half_powi:
+; FP16: # %bb.0:
+; FP16-NEXT: pushq %rbx
+; FP16-NEXT: movq %rsi, %rbx
+; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
+; FP16-NEXT: vcvtsi2sh %edi, %xmm1, %xmm1
+; FP16-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
+; FP16-NEXT: callq powf at PLT
+; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
+; FP16-NEXT: vmovsh %xmm0, (%rbx)
+; FP16-NEXT: popq %rbx
+; FP16-NEXT: retq
+;
+; X64-LABEL: test_half_powi:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %rax
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movl %edi, %ebp
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: cvtsi2ss %ebp, %xmm0
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: callq __extendhfsf2 at PLT
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: callq powf at PLT
+; X64-NEXT: callq __truncsfhf2 at PLT
+; X64-NEXT: pextrw $0, %xmm0, %eax
+; X64-NEXT: movw %ax, (%rbx)
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+;
+; X86-LABEL: test_half_powi:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $56, %esp
+; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorps %xmm0, %xmm0
+; X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT: movss %xmm0, (%esp)
+; X86-NEXT: calll __truncsfhf2
+; X86-NEXT: pextrw $0, %xmm0, %eax
+; X86-NEXT: movw %ax, (%esp)
+; X86-NEXT: calll __extendhfsf2
+; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
+; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-NEXT: pextrw $0, %xmm0, %eax
+; X86-NEXT: movw %ax, (%esp)
+; X86-NEXT: calll __extendhfsf2
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
+; X86-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-NEXT: calll powf
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: calll __truncsfhf2
+; X86-NEXT: pextrw $0, %xmm0, %eax
+; X86-NEXT: movw %ax, (%esi)
+; X86-NEXT: addl $56, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+ %res = call half @llvm.powi.half(half %a0, i32 %a1)
+ store half %res, ptr %p0, align 2
+ ret void
+}
+
define void @test_half_rint(half %a0, ptr %p0) nounwind {
; F16C-LABEL: test_half_rint:
; F16C: # %bb.0:
>From 56673ee344005cb4cbc337022534be686c7661de Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz at v01d.xyz>
Date: Fri, 23 Aug 2024 05:23:40 +0200
Subject: [PATCH 2/2] [X86][LegalizeDAG] Legalize FPOWI (f16) with Promote
Instead of defaulting to Expand.
Without that, since Expand is not implemented for FPOWI, it
fallthroughs to Libcall. As there are no f16 libcalls, the program
aborts when asserts are enabled.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
llvm/test/CodeGen/X86/fp16-libcalls.ll | 42 +++++++------------------
2 files changed, 12 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1a6be4eb5af1ef..f011249d295040 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -614,6 +614,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FTAN, VT, Action);
setOperationAction(ISD::FSQRT, VT, Action);
setOperationAction(ISD::FPOW, VT, Action);
+ setOperationAction(ISD::FPOWI, VT, Action);
setOperationAction(ISD::FLOG, VT, Action);
setOperationAction(ISD::FLOG2, VT, Action);
setOperationAction(ISD::FLOG10, VT, Action);
diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll
index 7a6e9a8de41e17..5c6c3653a27f95 100644
--- a/llvm/test/CodeGen/X86/fp16-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll
@@ -932,12 +932,9 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
; F16C-NEXT: pushq %rbx
; F16C-NEXT: movq %rsi, %rbx
; F16C-NEXT: vpextrw $0, %xmm0, %eax
-; F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm0
-; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; F16C-NEXT: vcvtph2ps %xmm0, %xmm1
; F16C-NEXT: vmovd %eax, %xmm0
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
-; F16C-NEXT: callq powf at PLT
+; F16C-NEXT: callq __powisf2 at PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
@@ -949,9 +946,7 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
; FP16-NEXT: pushq %rbx
; FP16-NEXT: movq %rsi, %rbx
; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
-; FP16-NEXT: vcvtsi2sh %edi, %xmm1, %xmm1
-; FP16-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1
-; FP16-NEXT: callq powf at PLT
+; FP16-NEXT: callq __powisf2 at PLT
; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
; FP16-NEXT: vmovsh %xmm0, (%rbx)
; FP16-NEXT: popq %rbx
@@ -965,15 +960,8 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
; X64-NEXT: movq %rsi, %rbx
; X64-NEXT: movl %edi, %ebp
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; X64-NEXT: xorps %xmm0, %xmm0
-; X64-NEXT: cvtsi2ss %ebp, %xmm0
-; X64-NEXT: callq __truncsfhf2 at PLT
-; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: movaps %xmm0, %xmm1
-; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
-; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: callq powf at PLT
+; X64-NEXT: movl %ebp, %edi
+; X64-NEXT: callq __powisf2 at PLT
; X64-NEXT: callq __truncsfhf2 at PLT
; X64-NEXT: pextrw $0, %xmm0, %eax
; X64-NEXT: movw %ax, (%rbx)
@@ -984,33 +972,25 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
;
; X86-LABEL: test_half_powi:
; X86: # %bb.0:
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: subl $56, %esp
+; X86-NEXT: subl $20, %esp
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorps %xmm0, %xmm0
-; X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: movss %xmm0, (%esp)
-; X86-NEXT: calll __truncsfhf2
-; X86-NEXT: pextrw $0, %xmm0, %eax
-; X86-NEXT: movw %ax, (%esp)
-; X86-NEXT: calll __extendhfsf2
-; X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
-; X86-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: pextrw $0, %xmm0, %eax
; X86-NEXT: movw %ax, (%esp)
; X86-NEXT: calll __extendhfsf2
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: fstps (%esp)
-; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
-; X86-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NEXT: calll powf
+; X86-NEXT: calll __powisf2
; X86-NEXT: fstps (%esp)
; X86-NEXT: calll __truncsfhf2
; X86-NEXT: pextrw $0, %xmm0, %eax
; X86-NEXT: movw %ax, (%esi)
-; X86-NEXT: addl $56, %esp
+; X86-NEXT: addl $20, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl
%res = call half @llvm.powi.half(half %a0, i32 %a1)
store half %res, ptr %p0, align 2
More information about the llvm-commits
mailing list