[llvm] d8f6338 - AArch64 SVE

Wed Aug 24 04:09:36 PDT 2022

Author: Hassnaa Hamdi
Date: 2022-08-24T11:09:22Z
New Revision: d8f63382e834756fa1a3de1bafeeeb14921bdde3

URL: https://github.com/llvm/llvm-project/commit/d8f63382e834756fa1a3de1bafeeeb14921bdde3
DIFF: https://github.com/llvm/llvm-project/commit/d8f63382e834756fa1a3de1bafeeeb14921bdde3.diff

LOG: AArch64 SVE
Add SVE patterns to make use of predicated smin, umin, smax, and umax instructions,
add sve-min-max-pred.ll test file for the new patterns

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D132122

Added: 
    llvm/test/CodeGen/AArch64/sve-min-max-pred.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index cd424ff07d184..08dd920c91378 100644

--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -389,6 +389,11 @@ class fma_patfrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
 def AArch64fmla_m1 : fma_patfrags<int_aarch64_sve_fmla, AArch64fadd_p_nsz>;
 def AArch64fmls_m1 : fma_patfrags<int_aarch64_sve_fmls, AArch64fsub_p>;
 
+def AArch64smax_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_smax, AArch64smax_p>;
+def AArch64umax_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_umax, AArch64umax_p>;
+def AArch64smin_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_smin, AArch64smin_p>;
+def AArch64umin_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_umin, AArch64umin_p>;
+
 let Predicates = [HasSVE] in {
   defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
   def  RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">;
@@ -516,10 +521,10 @@ let Predicates = [HasSVEorSME] in {
   def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)))),
             (CNOT_ZPmZ_D $Op2, $Pg, $Op2)>;
 
-  defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
-  defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
-  defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
-  defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>;
+  defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", AArch64smax_m1, DestructiveBinaryComm>;
+  defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", AArch64umax_m1, DestructiveBinaryComm>;
+  defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", AArch64smin_m1, DestructiveBinaryComm>;
+  defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", AArch64umin_m1, DestructiveBinaryComm>;
   defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>;
   defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>;
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-min-max-pred.ll b/llvm/test/CodeGen/AArch64/sve-min-max-pred.ll
new file mode 100644
index 0000000000000..9fbbb8e4a433d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-min-max-pred.ll
@@ -0,0 +1,194 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve -mtriple=aarch64  %s -o - | FileCheck %s --check-prefixes=CHECK
+
+; These tests just check that the plumbing is in place for @llvm.smax, @llvm.umax,
+; @llvm.smin, @llvm.umin.
+
+; tests for smax:
+
+define <vscale x 16 x i8> @smax_select_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: smax_select_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 16 x i8> @llvm.smax.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  %out = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %sel, <vscale x 16 x i8> %a
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @smax_select_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: smax_select_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  %out = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %sel, <vscale x 8 x i16> %a
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @smax_select_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: smax_select_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  %out = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %sel, <vscale x 4 x i32> %a
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @smax_select_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: smax_select_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 2 x i64> @llvm.smax.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  %out = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %sel, <vscale x 2 x i64> %a
+  ret <vscale x 2 x i64> %out
+}
+
+; tests for umax:
+
+define <vscale x 16 x i8> @umax_select_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: umax_select_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 16 x i8> @llvm.umax.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  %out = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %sel, <vscale x 16 x i8> %a
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @umax_select_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: umax_select_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  %out = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %sel, <vscale x 8 x i16> %a
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @umax_select_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: umax_select_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  %out = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %sel, <vscale x 4 x i32> %a
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @umax_select_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: umax_select_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 2 x i64> @llvm.umax.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  %out = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %sel, <vscale x 2 x i64> %a
+  ret <vscale x 2 x i64> %out
+}
+
+; tests for smin:
+
+define <vscale x 16 x i8> @smin_select_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: smin_select_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 16 x i8> @llvm.smin.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  %out = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %sel, <vscale x 16 x i8> %a
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @smin_select_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: smin_select_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  %out = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %sel, <vscale x 8 x i16> %a
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @smin_select_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: smin_select_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  %out = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %sel, <vscale x 4 x i32> %a
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @smin_select_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: smin_select_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 2 x i64> @llvm.smin.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  %out = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %sel, <vscale x 2 x i64> %a
+  ret <vscale x 2 x i64> %out
+}
+
+; tests for umin:
+
+define <vscale x 16 x i8> @umin_select_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: umin_select_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  %out = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %sel, <vscale x 16 x i8> %a
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @umin_select_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: umin_select_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  %out = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %sel, <vscale x 8 x i16> %a
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @umin_select_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: umin_select_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  %out = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %sel, <vscale x 4 x i32> %a
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @umin_select_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: umin_select_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %sel = call <vscale x 2 x i64> @llvm.umin.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  %out = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %sel, <vscale x 2 x i64> %a
+  ret <vscale x 2 x i64> %out
+}
+
+
+declare <vscale x  16 x i8> @llvm.smax.nxv16i8(<vscale x  16 x i8>, <vscale x  16 x i8>)
+declare <vscale x  8 x i16> @llvm.smax.nxv8i16(<vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.smax.nxv4i32(<vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.smax.nxv2i64(<vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x  16 x i8> @llvm.umax.nxv16i8(<vscale x  16 x i8>, <vscale x  16 x i8>)
+declare <vscale x  8 x i16> @llvm.umax.nxv8i16(<vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.umax.nxv4i32(<vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.umax.nxv2i64(<vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x  16 x i8> @llvm.smin.nxv16i8(<vscale x  16 x i8>, <vscale x  16 x i8>)
+declare <vscale x  8 x i16> @llvm.smin.nxv8i16(<vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.smin.nxv4i32(<vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.smin.nxv2i64(<vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x  16 x i8> @llvm.umin.nxv16i8(<vscale x  16 x i8>, <vscale x  16 x i8>)
+declare <vscale x  8 x i16> @llvm.umin.nxv8i16(<vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.umin.nxv4i32(<vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.umin.nxv2i64(<vscale x  2 x i64>, <vscale x  2 x i64>)