[llvm] b0f89f0 - [AArch64] Extend testing for folding fadd to predicated SVE insinstructions. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 12 03:00:43 PDT 2023


Author: David Green
Date: 2023-04-12T11:00:38+01:00
New Revision: b0f89f09f2be84bebf90d469cf072494fe33c6f9

URL: https://github.com/llvm/llvm-project/commit/b0f89f09f2be84bebf90d469cf072494fe33c6f9
DIFF: https://github.com/llvm/llvm-project/commit/b0f89f09f2be84bebf90d469cf072494fe33c6f9.diff

LOG: [AArch64] Extend testing for folding fadd to predicated SVE insinstructions. NFC

This extends the existing tests to include negative zeros and additional
combinations. See D147724 and D147723.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/sve-fp-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index 9fc066b87714..db364814e360 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -823,6 +823,7 @@ define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x double> %m1, <vscale x 2 x d
   ret <vscale x 2 x double> %res
 }
 
+
 define <vscale x 8 x half> @fadd_h_sel(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: fadd_h_sel:
 ; CHECK:       // %bb.0:
@@ -883,36 +884,132 @@ define <vscale x 2 x double> @fsub_d_sel(<vscale x 2 x double> %a, <vscale x 2 x
   ret <vscale x 2 x double> %fsub
 }
 
+
+
+define <vscale x 8 x half> @fadd_h_sel_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fadd_h_sel_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %nz = fneg <vscale x 8 x half> zeroinitializer
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %b, <vscale x 8 x half> %nz
+  %fadd = fadd <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fadd
+}
+
+define <vscale x 4 x float> @fadd_s_sel_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fadd_s_sel_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %nz = fneg <vscale x 4 x float> zeroinitializer
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %b, <vscale x 4 x float> %nz
+  %fadd = fadd <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fadd
+}
+
+define <vscale x 2 x double> @fadd_d_sel_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fadd_d_sel_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %nz = fneg <vscale x 2 x double> zeroinitializer
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %b, <vscale x 2 x double> %nz
+  %fadd = fadd <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fadd
+}
+
+define <vscale x 8 x half> @fsub_h_sel_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fsub_h_sel_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %nz = fneg <vscale x 8 x half> zeroinitializer
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %b, <vscale x 8 x half> %nz
+  %fsub = fsub nsz <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fsub
+}
+
+define <vscale x 4 x float> @fsub_s_sel_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fsub_s_sel_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %nz = fneg <vscale x 4 x float> zeroinitializer
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %b, <vscale x 4 x float> %nz
+  %fsub = fsub nsz <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fsub
+}
+
+define <vscale x 2 x double> @fsub_d_sel_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fsub_d_sel_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %nz = fneg <vscale x 2 x double> zeroinitializer
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %b, <vscale x 2 x double> %nz
+  %fsub = fsub nsz <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fsub
+}
+
+
 define <vscale x 8 x half> @fadd_sel_fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: fadd_sel_fmul_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    mov z3.h, #0 // =0x0
+; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z3.h
+; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 8 x half> %b, %c
   %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> zeroinitializer
-  %fadd = fadd nsz contract <vscale x 8 x half> %a, %sel
+  %fadd = fadd contract <vscale x 8 x half> %a, %sel
   ret <vscale x 8 x half> %fadd
 }
 
 define <vscale x 4 x float> @fadd_sel_fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: fadd_sel_fmul_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    mov z3.s, #0 // =0x0
+; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z3.s
+; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 4 x float> %b, %c
   %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> zeroinitializer
-  %fadd = fadd nsz contract <vscale x 4 x float> %a, %sel
+  %fadd = fadd contract <vscale x 4 x float> %a, %sel
   ret <vscale x 4 x float> %fadd
 }
 
 define <vscale x 2 x double> @fadd_sel_fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: fadd_sel_fmul_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    mov z3.d, #0 // =0x0
+; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z3.d
+; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 2 x double> %b, %c
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> zeroinitializer
-  %fadd = fadd nsz contract <vscale x 2 x double> %a, %sel
+  %fadd = fadd contract <vscale x 2 x double> %a, %sel
   ret <vscale x 2 x double> %fadd
 }
 
@@ -949,30 +1046,278 @@ define <vscale x 2 x double> @fsub_sel_fmul_d(<vscale x 2 x double> %a, <vscale
   ret <vscale x 2 x double> %fsub
 }
 
-; Verify combine requires contract fast-math flag.
-define <vscale x 4 x float> @fadd_sel_fmul_no_contract_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
-; CHECK-LABEL: fadd_sel_fmul_no_contract_s:
+define <vscale x 8 x half> @fadd_sel_fmul_h_nsz(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_h_nsz:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
-; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    fmla z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 8 x half> %b, %c
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> zeroinitializer
+  %fadd = fadd nsz contract <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fadd
+}
+
+define <vscale x 4 x float> @fadd_sel_fmul_s_nsz(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_s_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmla z0.s, p0/m, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 4 x float> %b, %c
   %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> zeroinitializer
-  %fadd = fadd nsz <vscale x 4 x float> %a, %sel
+  %fadd = fadd nsz contract <vscale x 4 x float> %a, %sel
   ret <vscale x 4 x float> %fadd
 }
 
-; Verify combine requires no-signed zeros fast-math flag.
-define <vscale x 4 x float> @fadd_sel_fmul_no_nsz_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
-; CHECK-LABEL: fadd_sel_fmul_no_nsz_s:
+define <vscale x 2 x double> @fadd_sel_fmul_d_nsz(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_d_nsz:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z3.s, #0 // =0x0
+; CHECK-NEXT:    fmla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 2 x double> %b, %c
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> zeroinitializer
+  %fadd = fadd nsz contract <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fadd
+}
+
+define <vscale x 8 x half> @fsub_sel_fmul_h_nsz(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_h_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 8 x half> %b, %c
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> zeroinitializer
+  %fsub = fsub nsz contract <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fsub
+}
+
+define <vscale x 4 x float> @fsub_sel_fmul_s_nsz(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_s_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 4 x float> %b, %c
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> zeroinitializer
+  %fsub = fsub nsz contract <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fsub
+}
+
+define <vscale x 2 x double> @fsub_sel_fmul_d_nsz(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_d_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmls z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 2 x double> %b, %c
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> zeroinitializer
+  %fsub = fsub nsz contract <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fsub
+}
+
+
+define <vscale x 8 x half> @fadd_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_h_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 8 x half> %b, %c
+  %nz = fneg <vscale x 8 x half> zeroinitializer
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> %nz
+  %fadd = fadd contract <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fadd
+}
+
+define <vscale x 4 x float> @fadd_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_s_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
 ; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
-; CHECK-NEXT:    sel z1.s, p0, z1.s, z3.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
 ; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %fmul = fmul <vscale x 4 x float> %b, %c
-  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> zeroinitializer
+  %nz = fneg <vscale x 4 x float> zeroinitializer
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> %nz
   %fadd = fadd contract <vscale x 4 x float> %a, %sel
   ret <vscale x 4 x float> %fadd
 }
+
+define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_d_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 2 x double> %b, %c
+  %nz = fneg <vscale x 2 x double> zeroinitializer
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> %nz
+  %fadd = fadd contract <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fadd
+}
+
+define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_h_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 8 x half> %b, %c
+  %nz = fneg <vscale x 8 x half> zeroinitializer
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> %nz
+  %fsub = fsub contract <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fsub
+}
+
+define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_s_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 4 x float> %b, %c
+  %nz = fneg <vscale x 4 x float> zeroinitializer
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> %nz
+  %fsub = fsub contract <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fsub
+}
+
+define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_d_negzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 2 x double> %b, %c
+  %nz = fneg <vscale x 2 x double> zeroinitializer
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> %nz
+  %fsub = fsub contract <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fsub
+}
+
+
+
+define <vscale x 8 x half> @fadd_sel_fmul_h_negzero_nsz(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_h_negzero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 8 x half> %b, %c
+  %nz = fneg <vscale x 8 x half> zeroinitializer
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> %nz
+  %fadd = fadd nsz contract <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fadd
+}
+
+define <vscale x 4 x float> @fadd_sel_fmul_s_negzero_nsz(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_s_negzero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 4 x float> %b, %c
+  %nz = fneg <vscale x 4 x float> zeroinitializer
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> %nz
+  %fadd = fadd nsz contract <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fadd
+}
+
+define <vscale x 2 x double> @fadd_sel_fmul_d_negzero_nsz(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_d_negzero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 2 x double> %b, %c
+  %nz = fneg <vscale x 2 x double> zeroinitializer
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> %nz
+  %fadd = fadd nsz contract <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fadd
+}
+
+define <vscale x 8 x half> @fsub_sel_fmul_h_negzero_nsz(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_h_negzero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 8 x half> %b, %c
+  %nz = fneg <vscale x 8 x half> zeroinitializer
+  %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %fmul, <vscale x 8 x half> %nz
+  %fsub = fsub nsz contract <vscale x 8 x half> %a, %sel
+  ret <vscale x 8 x half> %fsub
+}
+
+define <vscale x 4 x float> @fsub_sel_fmul_s_negzero_nsz(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_s_negzero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
+; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 4 x float> %b, %c
+  %nz = fneg <vscale x 4 x float> zeroinitializer
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> %nz
+  %fsub = fsub nsz contract <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fsub
+}
+
+define <vscale x 2 x double> @fsub_sel_fmul_d_negzero_nsz(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: fsub_sel_fmul_d_negzero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT:    fmul z1.d, z1.d, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 2 x double> %b, %c
+  %nz = fneg <vscale x 2 x double> zeroinitializer
+  %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %fmul, <vscale x 2 x double> %nz
+  %fsub = fsub nsz contract <vscale x 2 x double> %a, %sel
+  ret <vscale x 2 x double> %fsub
+}
+
+
+
+; Verify combine requires contract fast-math flag.
+define <vscale x 4 x float> @fadd_sel_fmul_no_contract_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: fadd_sel_fmul_no_contract_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %fmul = fmul <vscale x 4 x float> %b, %c
+  %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %fmul, <vscale x 4 x float> zeroinitializer
+  %fadd = fadd nsz <vscale x 4 x float> %a, %sel
+  ret <vscale x 4 x float> %fadd
+}


        


More information about the llvm-commits mailing list