[llvm] dd7a3d4 - [X86] Extend #118680 - support f16/bf16 fabs/fneg load-store patterns

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 5 03:00:37 PST 2024


Author: Simon Pilgrim
Date: 2024-12-05T10:31:56Z
New Revision: dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9

URL: https://github.com/llvm/llvm-project/commit/dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9
DIFF: https://github.com/llvm/llvm-project/commit/dd7a3d4d798e30dfe53b5bbbbcd9a23c24ea1af9.diff

LOG: [X86] Extend #118680 - support f16/bf16 fabs/fneg load-store patterns

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-fabs.ll
    llvm/test/CodeGen/X86/combine-fneg.ll
    llvm/test/CodeGen/X86/fp16-libcalls.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4ae3e8fbe0f805..c18a4ac9acb1e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52662,7 +52662,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
   }
 
   // Convert scalar fabs/fneg load-store to integer equivalents.
-  if ((VT == MVT::f32 || VT == MVT::f64) &&
+  if ((VT == MVT::f16 || VT == MVT::bf16 || VT == MVT::f32 || VT == MVT::f64) &&
       (StoredVal.getOpcode() == ISD::FABS ||
        StoredVal.getOpcode() == ISD::FNEG) &&
       ISD::isNormalLoad(StoredVal.getOperand(0).getNode()) &&

diff  --git a/llvm/test/CodeGen/X86/combine-fabs.ll b/llvm/test/CodeGen/X86/combine-fabs.ll
index 0aafc39e7aca75..7aa6628cb7f391 100644
--- a/llvm/test/CodeGen/X86/combine-fabs.ll
+++ b/llvm/test/CodeGen/X86/combine-fabs.ll
@@ -175,29 +175,12 @@ define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
 define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
 ; SSE-LABEL: combine_fabs_int_rmw_bfloat:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pushq %rbx
-; SSE-NEXT:    movq %rdi, %rbx
-; SSE-NEXT:    movzwl (%rdi), %eax
-; SSE-NEXT:    shll $16, %eax
-; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT:    callq __truncsfbf2 at PLT
-; SSE-NEXT:    pextrw $0, %xmm0, (%rbx)
-; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    andb $127, 1(%rdi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_int_rmw_bfloat:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    pushq %rbx
-; AVX-NEXT:    movq %rdi, %rbx
-; AVX-NEXT:    movzwl (%rdi), %eax
-; AVX-NEXT:    shll $16, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    callq __truncsfbf2 at PLT
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rbx)
-; AVX-NEXT:    popq %rbx
+; AVX-NEXT:    andb $127, 1(%rdi)
 ; AVX-NEXT:    retq
   %1 = load bfloat, ptr %ptr
   %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
@@ -208,27 +191,16 @@ define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
 define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
 ; SSE-LABEL: combine_fabs_int_half:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pushq %rbx
-; SSE-NEXT:    movq %rsi, %rbx
-; SSE-NEXT:    pinsrw $0, (%rdi), %xmm0
-; SSE-NEXT:    callq __extendhfsf2 at PLT
-; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT:    callq __truncsfhf2 at PLT
-; SSE-NEXT:    pextrw $0, %xmm0, (%rbx)
-; SSE-NEXT:    popq %rbx
+; SSE-NEXT:    movzwl (%rdi), %eax
+; SSE-NEXT:    andl $32767, %eax # imm = 0x7FFF
+; SSE-NEXT:    movw %ax, (%rsi)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_fabs_int_half:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    pushq %rbx
-; AVX-NEXT:    movq %rsi, %rbx
-; AVX-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
-; AVX-NEXT:    callq __extendhfsf2 at PLT
-; AVX-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    callq __truncsfhf2 at PLT
-; AVX-NEXT:    vpextrw $0, %xmm0, (%rbx)
-; AVX-NEXT:    popq %rbx
+; AVX-NEXT:    movzwl (%rdi), %eax
+; AVX-NEXT:    andl $32767, %eax # imm = 0x7FFF
+; AVX-NEXT:    movw %ax, (%rsi)
 ; AVX-NEXT:    retq
   %1 = load half, ptr %src
   %2 = call half @llvm.fabs.f16(half %1)

diff  --git a/llvm/test/CodeGen/X86/combine-fneg.ll b/llvm/test/CodeGen/X86/combine-fneg.ll
index 32e70b0678e3ef..8ca7fb81563faa 100644
--- a/llvm/test/CodeGen/X86/combine-fneg.ll
+++ b/llvm/test/CodeGen/X86/combine-fneg.ll
@@ -207,68 +207,16 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
 
 ; store(fneg(load())) - convert scalar to integer
 define void @fneg_int_rmw_half(ptr %ptr) nounwind {
-; X86-SSE1-LABEL: fneg_int_rmw_half:
-; X86-SSE1:       # %bb.0:
-; X86-SSE1-NEXT:    pushl %esi
-; X86-SSE1-NEXT:    subl $8, %esp
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT:    movzwl (%esi), %eax
-; X86-SSE1-NEXT:    movl %eax, (%esp)
-; X86-SSE1-NEXT:    calll __gnu_h2f_ieee
-; X86-SSE1-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE1-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE1-NEXT:    movss %xmm0, (%esp)
-; X86-SSE1-NEXT:    calll __gnu_f2h_ieee
-; X86-SSE1-NEXT:    movw %ax, (%esi)
-; X86-SSE1-NEXT:    addl $8, %esp
-; X86-SSE1-NEXT:    popl %esi
-; X86-SSE1-NEXT:    retl
-;
-; X86-SSE2-LABEL: fneg_int_rmw_half:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %esi
-; X86-SSE2-NEXT:    subl $8, %esp
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT:    pinsrw $0, (%esi), %xmm0
-; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT:    movw %ax, (%esp)
-; X86-SSE2-NEXT:    calll __extendhfsf2
-; X86-SSE2-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, (%esp)
-; X86-SSE2-NEXT:    calll __truncsfhf2
-; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT:    movw %ax, (%esi)
-; X86-SSE2-NEXT:    addl $8, %esp
-; X86-SSE2-NEXT:    popl %esi
-; X86-SSE2-NEXT:    retl
-;
-; X64-SSE1-LABEL: fneg_int_rmw_half:
-; X64-SSE1:       # %bb.0:
-; X64-SSE1-NEXT:    pushq %rbx
-; X64-SSE1-NEXT:    movq %rdi, %rbx
-; X64-SSE1-NEXT:    movzwl (%rdi), %edi
-; X64-SSE1-NEXT:    callq __gnu_h2f_ieee at PLT
-; X64-SSE1-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE1-NEXT:    callq __gnu_f2h_ieee at PLT
-; X64-SSE1-NEXT:    movw %ax, (%rbx)
-; X64-SSE1-NEXT:    popq %rbx
-; X64-SSE1-NEXT:    retq
+; X86-SSE-LABEL: fneg_int_rmw_half:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    xorb $-128, 1(%eax)
+; X86-SSE-NEXT:    retl
 ;
-; X64-SSE2-LABEL: fneg_int_rmw_half:
-; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    pushq %rbx
-; X64-SSE2-NEXT:    movq %rdi, %rbx
-; X64-SSE2-NEXT:    pinsrw $0, (%rdi), %xmm0
-; X64-SSE2-NEXT:    callq __extendhfsf2 at PLT
-; X64-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT:    callq __truncsfhf2 at PLT
-; X64-SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; X64-SSE2-NEXT:    movw %ax, (%rbx)
-; X64-SSE2-NEXT:    popq %rbx
-; X64-SSE2-NEXT:    retq
+; X64-SSE-LABEL: fneg_int_rmw_half:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    xorb $-128, 1(%rdi)
+; X64-SSE-NEXT:    retq
   %1 = load half, ptr %ptr
   %2 = fneg half %1
   store half %2, ptr %ptr
@@ -276,71 +224,21 @@ define void @fneg_int_rmw_half(ptr %ptr) nounwind {
 }
 
 define void @fneg_int_bfloat(ptr %src, ptr %dst) nounwind {
-; X86-SSE1-LABEL: fneg_int_bfloat:
-; X86-SSE1:       # %bb.0:
-; X86-SSE1-NEXT:    pushl %esi
-; X86-SSE1-NEXT:    subl $8, %esp
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT:    movzwl (%eax), %eax
-; X86-SSE1-NEXT:    shll $16, %eax
-; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE1-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE1-NEXT:    movss %xmm0, (%esp)
-; X86-SSE1-NEXT:    calll __truncsfbf2
-; X86-SSE1-NEXT:    movw %ax, (%esi)
-; X86-SSE1-NEXT:    addl $8, %esp
-; X86-SSE1-NEXT:    popl %esi
-; X86-SSE1-NEXT:    retl
-;
-; X86-SSE2-LABEL: fneg_int_bfloat:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %esi
-; X86-SSE2-NEXT:    pushl %eax
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movzwl (%eax), %eax
-; X86-SSE2-NEXT:    shll $16, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, (%esp)
-; X86-SSE2-NEXT:    calll __truncsfbf2
-; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; X86-SSE2-NEXT:    movw %ax, (%esi)
-; X86-SSE2-NEXT:    addl $4, %esp
-; X86-SSE2-NEXT:    popl %esi
-; X86-SSE2-NEXT:    retl
-;
-; X64-SSE1-LABEL: fneg_int_bfloat:
-; X64-SSE1:       # %bb.0:
-; X64-SSE1-NEXT:    pushq %rbx
-; X64-SSE1-NEXT:    subq $16, %rsp
-; X64-SSE1-NEXT:    movq %rsi, %rbx
-; X64-SSE1-NEXT:    movzwl (%rdi), %eax
-; X64-SSE1-NEXT:    shll $16, %eax
-; X64-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; X64-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SSE1-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE1-NEXT:    callq __truncsfbf2 at PLT
-; X64-SSE1-NEXT:    movw %ax, (%rbx)
-; X64-SSE1-NEXT:    addq $16, %rsp
-; X64-SSE1-NEXT:    popq %rbx
-; X64-SSE1-NEXT:    retq
+; X86-SSE-LABEL: fneg_int_bfloat:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT:    movzwl (%ecx), %ecx
+; X86-SSE-NEXT:    xorl $32768, %ecx # imm = 0x8000
+; X86-SSE-NEXT:    movw %cx, (%eax)
+; X86-SSE-NEXT:    retl
 ;
-; X64-SSE2-LABEL: fneg_int_bfloat:
-; X64-SSE2:       # %bb.0:
-; X64-SSE2-NEXT:    pushq %rbx
-; X64-SSE2-NEXT:    movq %rsi, %rbx
-; X64-SSE2-NEXT:    movzwl (%rdi), %eax
-; X64-SSE2-NEXT:    shll $16, %eax
-; X64-SSE2-NEXT:    movd %eax, %xmm0
-; X64-SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-SSE2-NEXT:    callq __truncsfbf2 at PLT
-; X64-SSE2-NEXT:    pextrw $0, %xmm0, %eax
-; X64-SSE2-NEXT:    movw %ax, (%rbx)
-; X64-SSE2-NEXT:    popq %rbx
-; X64-SSE2-NEXT:    retq
+; X64-SSE-LABEL: fneg_int_bfloat:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movzwl (%rdi), %eax
+; X64-SSE-NEXT:    xorl $32768, %eax # imm = 0x8000
+; X64-SSE-NEXT:    movw %ax, (%rsi)
+; X64-SSE-NEXT:    retq
   %1 = load bfloat, ptr %src
   %2 = fneg bfloat %1
   store bfloat %2, ptr %dst

diff  --git a/llvm/test/CodeGen/X86/fp16-libcalls.ll b/llvm/test/CodeGen/X86/fp16-libcalls.ll
index d2b5ef0f086653..1515cd1366bc63 100644
--- a/llvm/test/CodeGen/X86/fp16-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp16-libcalls.ll
@@ -379,22 +379,10 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
 ;
 ; X86-LABEL: test_half_fabs:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    pextrw $0, %xmm0, %eax
-; X86-NEXT:    movw %ax, (%esp)
-; X86-NEXT:    calll __extendhfsf2
-; X86-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    movd %xmm0, (%esp)
-; X86-NEXT:    calll __truncsfhf2
-; X86-NEXT:    pextrw $0, %xmm0, %eax
-; X86-NEXT:    movw %ax, (%esi)
-; X86-NEXT:    addl $8, %esp
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    movw %cx, (%eax)
 ; X86-NEXT:    retl
   %res = call half @llvm.fabs.half(half %a0)
   store half %res, ptr %p0, align 2
@@ -584,22 +572,10 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
 ;
 ; X86-LABEL: test_half_fneg:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    pextrw $0, %xmm0, %eax
-; X86-NEXT:    movw %ax, (%esp)
-; X86-NEXT:    calll __extendhfsf2
-; X86-NEXT:    fstps {{[0-9]+}}(%esp)
-; X86-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-NEXT:    movd %xmm0, (%esp)
-; X86-NEXT:    calll __truncsfhf2
-; X86-NEXT:    pextrw $0, %xmm0, %eax
-; X86-NEXT:    movw %ax, (%esi)
-; X86-NEXT:    addl $8, %esp
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl $32768, %ecx # imm = 0x8000
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movw %cx, (%eax)
 ; X86-NEXT:    retl
   %res = fneg half %a0
   store half %res, ptr %p0, align 2


        


More information about the llvm-commits mailing list