[llvm] [X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic (PR #106370)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 22:57:41 PDT 2024
================
@@ -0,0 +1,390 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
+; RUN: llc < %s -mtriple=i686-- --mattr=-sse2 | FileCheck %s -check-prefixes=SSE,SSE1
+; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2
+; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1
+; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW
+
+define float @canon_fp32_varargsf32(float %a) {
+; SSE1-LABEL: canon_fp32_varargsf32:
+; SSE1: # %bb.0:
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmuls {{[0-9]+}}(%esp)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: canon_fp32_varargsf32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp32_varargsf32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %canonicalized = call float @llvm.canonicalize.f32(float %a)
+ ret float %canonicalized
+}
+
+define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) {
+; SSE1-LABEL: canon_fp32_varargsf80:
+; SSE1: # %bb.0:
+; SSE1-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmulp %st, %st(1)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: canon_fp32_varargsf80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fld1
+; SSE2-NEXT: fmulp %st, %st(1)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canon_fp32_varargsf80:
+; AVX: # %bb.0:
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fld1
+; AVX-NEXT: fmulp %st, %st(1)
+; AVX-NEXT: retq
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a)
+ ret x86_fp80 %canonicalized
+}
+
+define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
+; SSE1-LABEL: complex_canonicalize_fmul_x86_fp80:
+; SSE1: # %bb.0: # %entry
+; SSE1-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE1-NEXT: fldt {{[0-9]+}}(%esp)
+; SSE1-NEXT: fsub %st(1), %st
+; SSE1-NEXT: fld %st(0)
+; SSE1-NEXT: fadd %st(2), %st
+; SSE1-NEXT: fsubp %st, %st(1)
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmulp %st, %st(1)
+; SSE1-NEXT: fsubp %st, %st(1)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: complex_canonicalize_fmul_x86_fp80:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fldt {{[0-9]+}}(%rsp)
+; SSE2-NEXT: fsub %st(1), %st
+; SSE2-NEXT: fld %st(0)
+; SSE2-NEXT: fadd %st(2), %st
+; SSE2-NEXT: fsubp %st, %st(1)
+; SSE2-NEXT: fld1
+; SSE2-NEXT: fmulp %st, %st(1)
+; SSE2-NEXT: fsubp %st, %st(1)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: complex_canonicalize_fmul_x86_fp80:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT: fsub %st(1), %st
+; AVX-NEXT: fld %st(0)
+; AVX-NEXT: fadd %st(2), %st
+; AVX-NEXT: fsubp %st, %st(1)
+; AVX-NEXT: fld1
+; AVX-NEXT: fmulp %st, %st(1)
+; AVX-NEXT: fsubp %st, %st(1)
+; AVX-NEXT: retq
+entry:
+
+ %mul1 = fsub x86_fp80 %a, %b
+ %add = fadd x86_fp80 %mul1, %b
+ %mul2 = fsub x86_fp80 %add, %mul1
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %mul2)
+ %result = fsub x86_fp80 %canonicalized, %b
+ ret x86_fp80 %result
+}
+
+define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+; SSE1-LABEL: canonicalize_fp64:
+; SSE1: # %bb.0: # %start
+; SSE1-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE1-NEXT: fldl {{[0-9]+}}(%esp)
+; SSE1-NEXT: fucom %st(1)
+; SSE1-NEXT: fnstsw %ax
+; SSE1-NEXT: # kill: def $ah killed $ah killed $ax
+; SSE1-NEXT: sahf
+; SSE1-NEXT: fxch %st(1)
+; SSE1-NEXT: fucom %st(0)
+; SSE1-NEXT: fnstsw %ax
+; SSE1-NEXT: fld %st(1)
+; SSE1-NEXT: ja .LBB3_2
+; SSE1-NEXT: # %bb.1: # %start
+; SSE1-NEXT: fstp %st(0)
+; SSE1-NEXT: fldz
+; SSE1-NEXT: fxch %st(1)
+; SSE1-NEXT: .LBB3_2: # %start
+; SSE1-NEXT: fstp %st(1)
+; SSE1-NEXT: # kill: def $ah killed $ah killed $ax
+; SSE1-NEXT: sahf
+; SSE1-NEXT: jp .LBB3_4
+; SSE1-NEXT: # %bb.3: # %start
+; SSE1-NEXT: fstp %st(1)
+; SSE1-NEXT: fldz
+; SSE1-NEXT: .LBB3_4: # %start
+; SSE1-NEXT: fstp %st(0)
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmulp %st, %st(1)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: canonicalize_fp64:
+; SSE2: # %bb.0: # %start
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: cmpunordsd %xmm0, %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm3
+; SSE2-NEXT: andpd %xmm1, %xmm3
+; SSE2-NEXT: maxsd %xmm0, %xmm1
+; SSE2-NEXT: andnpd %xmm1, %xmm2
+; SSE2-NEXT: orpd %xmm3, %xmm2
+; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: canonicalize_fp64:
+; AVX1: # %bb.0: # %start
+; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: canonicalize_fp64:
+; AVX2: # %bb.0: # %start
+; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canonicalize_fp64:
+; AVX512F: # %bb.0: # %start
+; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canonicalize_fp64:
+; AVX512BW: # %bb.0: # %start
+; AVX512BW-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
+; AVX512BW-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512BW-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
+; AVX512BW-NEXT: retq
+start:
+
+ %c = fcmp olt double %a, %b
+ %d = fcmp uno double %a, 0.000000e+00
+ %or.cond.i.i = or i1 %d, %c
+ %e = select i1 %or.cond.i.i, double %b, double %a
+ %f = tail call double @llvm.canonicalize.f64(double %e) #2
+ ret double %f
+}
+
+define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
+; SSE1-LABEL: canonicalize_fp32:
+; SSE1: # %bb.0: # %start
+; SSE1-NEXT: flds {{[0-9]+}}(%esp)
+; SSE1-NEXT: flds {{[0-9]+}}(%esp)
+; SSE1-NEXT: fucom %st(1)
+; SSE1-NEXT: fnstsw %ax
+; SSE1-NEXT: # kill: def $ah killed $ah killed $ax
+; SSE1-NEXT: sahf
+; SSE1-NEXT: fxch %st(1)
+; SSE1-NEXT: fucom %st(0)
+; SSE1-NEXT: fnstsw %ax
+; SSE1-NEXT: fld %st(1)
+; SSE1-NEXT: ja .LBB4_2
+; SSE1-NEXT: # %bb.1: # %start
+; SSE1-NEXT: fstp %st(0)
+; SSE1-NEXT: fldz
+; SSE1-NEXT: fxch %st(1)
+; SSE1-NEXT: .LBB4_2: # %start
+; SSE1-NEXT: fstp %st(1)
+; SSE1-NEXT: # kill: def $ah killed $ah killed $ax
+; SSE1-NEXT: sahf
+; SSE1-NEXT: jp .LBB4_4
+; SSE1-NEXT: # %bb.3: # %start
+; SSE1-NEXT: fstp %st(1)
+; SSE1-NEXT: fldz
+; SSE1-NEXT: .LBB4_4: # %start
+; SSE1-NEXT: fstp %st(0)
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmulp %st, %st(1)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: canonicalize_fp32:
+; SSE2: # %bb.0: # %start
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: cmpunordss %xmm0, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm3
+; SSE2-NEXT: andps %xmm1, %xmm3
+; SSE2-NEXT: maxss %xmm0, %xmm1
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm3, %xmm2
+; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: canonicalize_fp32:
+; AVX1: # %bb.0: # %start
+; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: canonicalize_fp32:
+; AVX2: # %bb.0: # %start
+; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: canonicalize_fp32:
+; AVX512F: # %bb.0: # %start
+; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: canonicalize_fp32:
+; AVX512BW: # %bb.0: # %start
+; AVX512BW-NEXT: vmaxss %xmm0, %xmm1, %xmm2
+; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1
+; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
+; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
+; AVX512BW-NEXT: retq
+start:
+
+ %cc = fcmp olt float %aa, %bb
+ %dd = fcmp uno float %aa, 0.000000e+00
+ %or.cond.i.i.x = or i1 %dd, %cc
+ %ee = select i1 %or.cond.i.i.x, float %bb, float %aa
+ %ff = tail call float @llvm.canonicalize.f32(float %ee) #2
+ ret float %ff
+}
+
+define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
+; SSE1-LABEL: v_test_canonicalize_var_f32:
+; SSE1: # %bb.0:
+; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmuls (%eax)
+; SSE1-NEXT: fstps (%eax)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: v_test_canonicalize_var_f32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; SSE2-NEXT: mulss (%rdi), %xmm0
+; SSE2-NEXT: movss %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_var_f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovss %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %val = load float, float addrspace(1)* %out
+ %canonicalized = call float @llvm.canonicalize.f32(float %val)
+ store float %canonicalized, float addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
+; SSE1-LABEL: v_test_canonicalize_x86_fp80:
+; SSE1: # %bb.0:
+; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE1-NEXT: fldt (%eax)
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmulp %st, %st(1)
+; SSE1-NEXT: fstpt (%eax)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: v_test_canonicalize_x86_fp80:
+; SSE2: # %bb.0:
+; SSE2-NEXT: fldt (%rdi)
+; SSE2-NEXT: fld1
+; SSE2-NEXT: fmulp %st, %st(1)
+; SSE2-NEXT: fstpt (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_x86_fp80:
+; AVX: # %bb.0:
+; AVX-NEXT: fldt (%rdi)
+; AVX-NEXT: fld1
+; AVX-NEXT: fmulp %st, %st(1)
+; AVX-NEXT: fstpt (%rdi)
+; AVX-NEXT: retq
+ %val = load x86_fp80, x86_fp80 addrspace(1)* %out
+ %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val)
+ store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+ ret void
+}
+
+define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
+; SSE1-LABEL: v_test_canonicalize_var_f64:
+; SSE1: # %bb.0:
+; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE1-NEXT: fld1
+; SSE1-NEXT: fmull (%eax)
+; SSE1-NEXT: fstpl (%eax)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: v_test_canonicalize_var_f64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; SSE2-NEXT: mulsd (%rdi), %xmm0
+; SSE2-NEXT: movsd %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: v_test_canonicalize_var_f64:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
+; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovsd %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %val = load double, double addrspace(1)* %out
+ %canonicalized = call double @llvm.canonicalize.f64(double %val)
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+define void @canonicalize_undef(double addrspace(1)* %out) {
+; SSE1-LABEL: canonicalize_undef:
+; SSE1: # %bb.0:
+; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE1-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000
+; SSE1-NEXT: movl $0, (%eax)
+; SSE1-NEXT: retl
+;
+; SSE2-LABEL: canonicalize_undef:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; SSE2-NEXT: movq %rax, (%rdi)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: canonicalize_undef:
+; AVX: # %bb.0:
+; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
+; AVX-NEXT: movq %rax, (%rdi)
+; AVX-NEXT: retq
+ %canonicalized = call double @llvm.canonicalize.f64(double undef)
+ store double %canonicalized, double addrspace(1)* %out
+ ret void
+}
+
+declare double @llvm.canonicalize.f64(double)
+declare float @llvm.canonicalize.f32(float)
+declare x86_fp80 @llvm.canonicalize.f80(x86_fp80)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE: {{.*}}
----------------
phoebewang wrote:
ditto.
https://github.com/llvm/llvm-project/pull/106370
More information about the llvm-commits
mailing list