[llvm] 722a568 - [X86] Add test coverage for f16/bf16 fabs/fneg load-store tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 03:00:33 PST 2024
Author: Simon Pilgrim
Date: 2024-12-05T10:31:55Z
New Revision: 722a5684326207d11bffb85ce422c8831d09c611
URL: https://github.com/llvm/llvm-project/commit/722a5684326207d11bffb85ce422c8831d09c611
DIFF: https://github.com/llvm/llvm-project/commit/722a5684326207d11bffb85ce422c8831d09c611.diff
LOG: [X86] Add test coverage for f16/bf16 fabs/fneg load-store tests
Future extension to #118680
Added:
Modified:
llvm/test/CodeGen/X86/combine-fabs.ll
llvm/test/CodeGen/X86/combine-fneg.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/combine-fabs.ll b/llvm/test/CodeGen/X86/combine-fabs.ll
index b9aad9075261b1..0aafc39e7aca75 100644
--- a/llvm/test/CodeGen/X86/combine-fabs.ll
+++ b/llvm/test/CodeGen/X86/combine-fabs.ll
@@ -172,6 +172,70 @@ define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
ret void
}
+define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
+; SSE-LABEL: combine_fabs_int_rmw_bfloat:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq %rdi, %rbx
+; SSE-NEXT: movzwl (%rdi), %eax
+; SSE-NEXT: shll $16, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: callq __truncsfbf2 at PLT
+; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: retq
+;
+; AVX-LABEL: combine_fabs_int_rmw_bfloat:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: movq %rdi, %rbx
+; AVX-NEXT: movzwl (%rdi), %eax
+; AVX-NEXT: shll $16, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: callq __truncsfbf2 at PLT
+; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: retq
+ %1 = load bfloat, ptr %ptr
+ %2 = call bfloat @llvm.fabs.bf16(bfloat %1)
+ store bfloat %2, ptr %ptr
+ ret void
+}
+
+define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
+; SSE-LABEL: combine_fabs_int_half:
+; SSE: # %bb.0:
+; SSE-NEXT: pushq %rbx
+; SSE-NEXT: movq %rsi, %rbx
+; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
+; SSE-NEXT: callq __extendhfsf2 at PLT
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: callq __truncsfhf2 at PLT
+; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: retq
+;
+; AVX-LABEL: combine_fabs_int_half:
+; AVX: # %bb.0:
+; AVX-NEXT: pushq %rbx
+; AVX-NEXT: movq %rsi, %rbx
+; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
+; AVX-NEXT: callq __extendhfsf2 at PLT
+; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: callq __truncsfhf2 at PLT
+; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
+; AVX-NEXT: popq %rbx
+; AVX-NEXT: retq
+ %1 = load half, ptr %src
+ %2 = call half @llvm.fabs.f16(half %1)
+ store half %2, ptr %dst
+ ret void
+}
+
; don't convert vector to scalar
define void @combine_fabs_vec_int_v4f32(ptr %src, ptr %dst) {
; SSE-LABEL: combine_fabs_vec_int_v4f32:
diff --git a/llvm/test/CodeGen/X86/combine-fneg.ll b/llvm/test/CodeGen/X86/combine-fneg.ll
index 855b64229a9c33..32e70b0678e3ef 100644
--- a/llvm/test/CodeGen/X86/combine-fneg.ll
+++ b/llvm/test/CodeGen/X86/combine-fneg.ll
@@ -206,6 +206,147 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
}
; store(fneg(load())) - convert scalar to integer
+define void @fneg_int_rmw_half(ptr %ptr) nounwind {
+; X86-SSE1-LABEL: fneg_int_rmw_half:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl %esi
+; X86-SSE1-NEXT: subl $8, %esp
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE1-NEXT: movzwl (%esi), %eax
+; X86-SSE1-NEXT: movl %eax, (%esp)
+; X86-SSE1-NEXT: calll __gnu_h2f_ieee
+; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: calll __gnu_f2h_ieee
+; X86-SSE1-NEXT: movw %ax, (%esi)
+; X86-SSE1-NEXT: addl $8, %esp
+; X86-SSE1-NEXT: popl %esi
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fneg_int_rmw_half:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: subl $8, %esp
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
+; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT: movw %ax, (%esp)
+; X86-SSE2-NEXT: calll __extendhfsf2
+; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll __truncsfhf2
+; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT: movw %ax, (%esi)
+; X86-SSE2-NEXT: addl $8, %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: retl
+;
+; X64-SSE1-LABEL: fneg_int_rmw_half:
+; X64-SSE1: # %bb.0:
+; X64-SSE1-NEXT: pushq %rbx
+; X64-SSE1-NEXT: movq %rdi, %rbx
+; X64-SSE1-NEXT: movzwl (%rdi), %edi
+; X64-SSE1-NEXT: callq __gnu_h2f_ieee at PLT
+; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE1-NEXT: callq __gnu_f2h_ieee at PLT
+; X64-SSE1-NEXT: movw %ax, (%rbx)
+; X64-SSE1-NEXT: popq %rbx
+; X64-SSE1-NEXT: retq
+;
+; X64-SSE2-LABEL: fneg_int_rmw_half:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pushq %rbx
+; X64-SSE2-NEXT: movq %rdi, %rbx
+; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
+; X64-SSE2-NEXT: callq __extendhfsf2 at PLT
+; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: callq __truncsfhf2 at PLT
+; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X64-SSE2-NEXT: movw %ax, (%rbx)
+; X64-SSE2-NEXT: popq %rbx
+; X64-SSE2-NEXT: retq
+ %1 = load half, ptr %ptr
+ %2 = fneg half %1
+ store half %2, ptr %ptr
+ ret void
+}
+
+define void @fneg_int_bfloat(ptr %src, ptr %dst) nounwind {
+; X86-SSE1-LABEL: fneg_int_bfloat:
+; X86-SSE1: # %bb.0:
+; X86-SSE1-NEXT: pushl %esi
+; X86-SSE1-NEXT: subl $8, %esp
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT: movzwl (%eax), %eax
+; X86-SSE1-NEXT: shll $16, %eax
+; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE1-NEXT: movss %xmm0, (%esp)
+; X86-SSE1-NEXT: calll __truncsfbf2
+; X86-SSE1-NEXT: movw %ax, (%esi)
+; X86-SSE1-NEXT: addl $8, %esp
+; X86-SSE1-NEXT: popl %esi
+; X86-SSE1-NEXT: retl
+;
+; X86-SSE2-LABEL: fneg_int_bfloat:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: pushl %esi
+; X86-SSE2-NEXT: pushl %eax
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movzwl (%eax), %eax
+; X86-SSE2-NEXT: shll $16, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, (%esp)
+; X86-SSE2-NEXT: calll __truncsfbf2
+; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT: movw %ax, (%esi)
+; X86-SSE2-NEXT: addl $4, %esp
+; X86-SSE2-NEXT: popl %esi
+; X86-SSE2-NEXT: retl
+;
+; X64-SSE1-LABEL: fneg_int_bfloat:
+; X64-SSE1: # %bb.0:
+; X64-SSE1-NEXT: pushq %rbx
+; X64-SSE1-NEXT: subq $16, %rsp
+; X64-SSE1-NEXT: movq %rsi, %rbx
+; X64-SSE1-NEXT: movzwl (%rdi), %eax
+; X64-SSE1-NEXT: shll $16, %eax
+; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
+; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE1-NEXT: callq __truncsfbf2 at PLT
+; X64-SSE1-NEXT: movw %ax, (%rbx)
+; X64-SSE1-NEXT: addq $16, %rsp
+; X64-SSE1-NEXT: popq %rbx
+; X64-SSE1-NEXT: retq
+;
+; X64-SSE2-LABEL: fneg_int_bfloat:
+; X64-SSE2: # %bb.0:
+; X64-SSE2-NEXT: pushq %rbx
+; X64-SSE2-NEXT: movq %rsi, %rbx
+; X64-SSE2-NEXT: movzwl (%rdi), %eax
+; X64-SSE2-NEXT: shll $16, %eax
+; X64-SSE2-NEXT: movd %eax, %xmm0
+; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-SSE2-NEXT: callq __truncsfbf2 at PLT
+; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
+; X64-SSE2-NEXT: movw %ax, (%rbx)
+; X64-SSE2-NEXT: popq %rbx
+; X64-SSE2-NEXT: retq
+ %1 = load bfloat, ptr %src
+ %2 = fneg bfloat %1
+ store bfloat %2, ptr %dst
+ ret void
+}
+
define void @fneg_int_rmw_f32(ptr %ptr) {
; X86-SSE-LABEL: fneg_int_rmw_f32:
; X86-SSE: # %bb.0:
More information about the llvm-commits
mailing list