[llvm] dd7a3d4 - [X86] Extend #118680 - support f16/bf16 fabs/fneg load-store patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 03:00:37 PST 2024
Author: Simon Pilgrim
Date: 2024-12-05T10:31:56Z
New Revision: dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9
URL: https://github.com/llvm/llvm-project/commit/dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9
DIFF: https://github.com/llvm/llvm-project/commit/dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9.diff
LOG: [X86] Extend #118680 - support f16/bf16 fabs/fneg load-store patterns
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-fabs.ll
llvm/test/CodeGen/X86/combine-fneg.ll
llvm/test/CodeGen/X86/fp16-libcalls.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4ae3e8fbe0f805..c18a4ac9acb1e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52662,7 +52662,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
// Convert scalar fabs/fneg load-store to integer equivalents.
- if ((VT == MVT::f32 || VT == MVT::f64) &&
+ if ((VT == MVT::f16 || VT == MVT::bf16 || VT == MVT::f32 || VT == MVT::f64) &&
(StoredVal.getOpcode() == ISD::FABS ||
StoredVal.getOpcode() == ISD::FNEG) &&
ISD::isNormalLoad(StoredVal.getOperand(0).getNode()) &&
diff --git a/llvm/test/CodeGen/X86/combine-fabs.ll b/llvm/test/CodeGen/X86/combine-fabs.ll
index 0aafc39e7aca75..7aa6628cb7f391 100644
--- a/llvm/test/CodeGen/X86/combine-fabs.ll
+++ b/llvm/test/CodeGen/X86/combine-fabs.ll
@@ -175,29 +175,12 @@ define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
; SSE-LABEL: combine_fabs_int_rmw_bfloat:
; SSE: # %bb.0:
-; SSE-NEXT: pushq %rbx
-; SSE-NEXT: movq %rdi, %rbx
-; SSE-NEXT: movzwl (%rdi), %eax
-; SSE-NEXT: shll $16, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: callq __truncsfbf2 at PLT
-; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
-; SSE-NEXT: popq %rbx
+; SSE-NEXT: andb $127, 1(%rdi)
; SSE-NEXT: retq
;
; AVX-LABEL: combine_fabs_int_rmw_bfloat:
; AVX: # %bb.0:
-; AVX-NEXT: pushq %rbx
-; AVX-NEXT: movq %rdi, %rbx
-; AVX-NEXT: movzwl (%rdi), %eax
-; AVX-NEXT: shll $16, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: callq __truncsfbf2 at PLT
-; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
-; AVX-NEXT: popq %rbx
+; AVX-NEXT: andb $127, 1(%rdi)
; AVX-NEXT: retq
%1 = load bfloat, ptr %ptr
%2 = call bfloat @llvm.fabs.bf16(bfloat %1)
@@ -208,27 +191,16 @@ define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
; SSE-LABEL: combine_fabs_int_half:
; SSE: # %bb.0:
-; SSE-NEXT: pushq %rbx
-; SSE-NEXT: movq %rsi, %rbx
-; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
-; SSE-NEXT: callq __extendhfsf2 at PLT
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: callq __truncsfhf2 at PLT
-; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
-; SSE-NEXT: popq %rbx
+; SSE-NEXT: movzwl (%rdi), %eax
+; SSE-NEXT: andl $32767, %eax # imm = 0x7FFF
+; SSE-NEXT: movw %ax, (%rsi)
; SSE-NEXT: retq
;
; AVX-LABEL: combine_fabs_int_half:
; AVX: # %bb.0:
-; AVX-NEXT: pushq %rbx
-; AVX-NEXT: movq %rsi, %rbx
-; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
-; AVX-NEXT: callq __extendhfsf2 at PLT
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: callq __truncsfhf2 at PLT
-; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
-; AVX-NEXT: popq %rbx
+; AVX-NEXT: movzwl (%rdi), %eax
+; AVX-NEXT: andl $32767, %eax # imm = 0x7FFF
+; AVX-NEXT: movw %ax, (%rsi)
; AVX-NEXT: retq
%1 = load half, ptr %src
%2 = call half @llvm.fabs.f16(half %1)
diff --git a/llvm/test/CodeGen/X86/combine-fneg.ll b/llvm/test/CodeGen/X86/combine-fneg.ll
index 32e70b0678e3ef..8ca7fb81563faa 100644
--- a/llvm/test/CodeGen/X86/combine-fneg.ll
+++ b/llvm/test/CodeGen/X86/combine-fneg.ll
@@ -207,68 +207,16 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
; store(fneg(load())) - convert scalar to integer
define void @fneg_int_rmw_half(ptr %ptr) nounwind {
-; X86-SSE1-LABEL: fneg_int_rmw_half:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl %esi
-; X86-SSE1-NEXT: subl $8, %esp
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT: movzwl (%esi), %eax
-; X86-SSE1-NEXT: movl %eax, (%esp)
-; X86-SSE1-NEXT: calll __gnu_h2f_ieee
-; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE1-NEXT: movss %xmm0, (%esp)
-; X86-SSE1-NEXT: calll __gnu_f2h_ieee
-; X86-SSE1-NEXT: movw %ax, (%esi)
-; X86-SSE1-NEXT: addl $8, %esp
-; X86-SSE1-NEXT: popl %esi
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: fneg_int_rmw_half:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: subl $8, %esp
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
-; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT: movw %ax, (%esp)
-; X86-SSE2-NEXT: calll __extendhfsf2
-; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movd %xmm0, (%esp)
-; X86-SSE2-NEXT: calll __truncsfhf2
-; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT: movw %ax, (%esi)
-; X86-SSE2-NEXT: addl $8, %esp
-; X86-SSE2-NEXT: popl %esi
-; X86-SSE2-NEXT: retl
-;
-; X64-SSE1-LABEL: fneg_int_rmw_half:
-; X64-SSE1: # %bb.0:
-; X64-SSE1-NEXT: pushq %rbx
-; X64-SSE1-NEXT: movq %rdi, %rbx
-; X64-SSE1-NEXT: movzwl (%rdi), %edi
-; X64-SSE1-NEXT: callq __gnu_h2f_ieee at PLT
-; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE1-NEXT: callq __gnu_f2h_ieee at PLT
-; X64-SSE1-NEXT: movw %ax, (%rbx)
-; X64-SSE1-NEXT: popq %rbx
-; X64-SSE1-NEXT: retq
+; X86-SSE-LABEL: fneg_int_rmw_half:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: xorb $-128, 1(%eax)
+; X86-SSE-NEXT: retl
;
-; X64-SSE2-LABEL: fneg_int_rmw_half:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rbx
-; X64-SSE2-NEXT: movq %rdi, %rbx
-; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
-; X64-SSE2-NEXT: callq __extendhfsf2 at PLT
-; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: callq __truncsfhf2 at PLT
-; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; X64-SSE2-NEXT: movw %ax, (%rbx)
-; X64-SSE2-NEXT: popq %rbx
-; X64-SSE2-NEXT: retq
+; X64-SSE-LABEL: fneg_int_rmw_half:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: xorb $-128, 1(%rdi)
+; X64-SSE-NEXT: retq
%1 = load half, ptr %ptr
%2 = fneg half %1
store half %2, ptr %ptr
@@ -276,71 +224,21 @@ define void @fneg_int_rmw_half(ptr %ptr) nounwind {
}
define void @fneg_int_bfloat(ptr %src, ptr %dst) nounwind {
-; X86-SSE1-LABEL: fneg_int_bfloat:
-; X86-SSE1: # %bb.0:
-; X86-SSE1-NEXT: pushl %esi
-; X86-SSE1-NEXT: subl $8, %esp
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT: movzwl (%eax), %eax
-; X86-SSE1-NEXT: shll $16, %eax
-; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE1-NEXT: movss %xmm0, (%esp)
-; X86-SSE1-NEXT: calll __truncsfbf2
-; X86-SSE1-NEXT: movw %ax, (%esi)
-; X86-SSE1-NEXT: addl $8, %esp
-; X86-SSE1-NEXT: popl %esi
-; X86-SSE1-NEXT: retl
-;
-; X86-SSE2-LABEL: fneg_int_bfloat:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movzwl (%eax), %eax
-; X86-SSE2-NEXT: shll $16, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT: movd %xmm0, (%esp)
-; X86-SSE2-NEXT: calll __truncsfbf2
-; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT: movw %ax, (%esi)
-; X86-SSE2-NEXT: addl $4, %esp
-; X86-SSE2-NEXT: popl %esi
-; X86-SSE2-NEXT: retl
-;
-; X64-SSE1-LABEL: fneg_int_bfloat:
-; X64-SSE1: # %bb.0:
-; X64-SSE1-NEXT: pushq %rbx
-; X64-SSE1-NEXT: subq $16, %rsp
-; X64-SSE1-NEXT: movq %rsi, %rbx
-; X64-SSE1-NEXT: movzwl (%rdi), %eax
-; X64-SSE1-NEXT: shll $16, %eax
-; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
-; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE1-NEXT: callq __truncsfbf2 at PLT
-; X64-SSE1-NEXT: movw %ax, (%rbx)
-; X64-SSE1-NEXT: addq $16, %rsp
-; X64-SSE1-NEXT: popq %rbx
-; X64-SSE1-NEXT: retq
+; X86-SSE-LABEL: fneg_int_bfloat:
+; X86-SSE: # %bb.0:
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT: movzwl (%ecx), %ecx
+; X86-SSE-NEXT: xorl $32768, %ecx # imm = 0x8000
+; X86-SSE-NEXT: movw %cx, (%eax)
+; X86-SSE-NEXT: retl
;
-; X64-SSE2-LABEL: fneg_int_bfloat:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: pushq %rbx
-; X64-SSE2-NEXT: movq %rsi, %rbx
-; X64-SSE2-NEXT: movzwl (%rdi), %eax
-; X64-SSE2-NEXT: shll $16, %eax
-; X64-SSE2-NEXT: movd %eax, %xmm0
-; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT: callq __truncsfbf2 at PLT
-; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
-; X64-SSE2-NEXT: movw %ax, (%rbx)
-; X64-SSE2-NEXT: popq %rbx
-; X64-SSE2-NEXT: retq
+; X64-SSE-LABEL: fneg_int_bfloat:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: movzwl (%rdi), %eax
+; X64-SSE-NEXT: xorl $32768, %eax # imm = 0x8000
+; X64-SSE-NEXT: movw %ax, (%rsi)
+; X64-SSE-NEXT: retq
%1 = load bfloat, ptr %src
%2 = fneg bfloat %1
store bfloat %2, ptr %dst
diff --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll
index d2b5ef0f086653..1515cd1366bc63 100644
--- a/llvm/test/CodeGen/X86/fp16-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll
@@ -379,22 +379,10 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
;
; X86-LABEL: test_half_fabs:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: pextrw $0, %xmm0, %eax
-; X86-NEXT: movw %ax, (%esp)
-; X86-NEXT: calll __extendhfsf2
-; X86-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: movd %xmm0, (%esp)
-; X86-NEXT: calll __truncsfhf2
-; X86-NEXT: pextrw $0, %xmm0, %eax
-; X86-NEXT: movw %ax, (%esi)
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT: movw %cx, (%eax)
; X86-NEXT: retl
%res = call half @llvm.fabs.half(half %a0)
store half %res, ptr %p0, align 2
@@ -584,22 +572,10 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
;
; X86-LABEL: test_half_fneg:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $8, %esp
-; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: pextrw $0, %xmm0, %eax
-; X86-NEXT: movw %ax, (%esp)
-; X86-NEXT: calll __extendhfsf2
-; X86-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT: movd %xmm0, (%esp)
-; X86-NEXT: calll __truncsfhf2
-; X86-NEXT: pextrw $0, %xmm0, %eax
-; X86-NEXT: movw %ax, (%esi)
-; X86-NEXT: addl $8, %esp
-; X86-NEXT: popl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl $32768, %ecx # imm = 0x8000
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movw %cx, (%eax)
; X86-NEXT: retl
%res = fneg half %a0
store half %res, ptr %p0, align 2
More information about the llvm-commits
mailing list