[llvm] s390x: pattern match saturated truncation (PR #155377)

Tue Aug 26 07:19:06 PDT 2025

https://github.com/folkertdev updated https://github.com/llvm/llvm-project/pull/155377

>From 832f1f9482341ab66c9138f23bf12c6af684e800 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Tue, 26 Aug 2025 11:39:50 +0200
Subject: [PATCH 1/4] s390x: legalize smin/smax/umin/umax for vectors

---
 .../Target/SystemZ/SystemZISelLowering.cpp    |  3 ++
 llvm/lib/Target/SystemZ/SystemZInstrVector.td | 32 +++++++++----------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index c73dc3021eb42..43c6e9a6d7514 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -492,6 +492,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
       // and inverting the result as necessary.
       setOperationAction(ISD::SETCC, VT, Custom);
+
+      setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VT,
+                         Legal);
     }
   }
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 10de8b05cf45f..a5a121ba31711 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -680,19 +680,19 @@ let Predicates = [FeatureVector] in {
   let isCommutable = 1 in {
     // Maximum.
     def VMX  : BinaryVRRcGeneric<"vmx", 0xE7FF>;
-    def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
-    def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
-    def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
-    def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+    def VMXB : BinaryVRRc<"vmxb", 0xE7FF, smax, v128b, v128b, 0>;
+    def VMXH : BinaryVRRc<"vmxh", 0xE7FF, smax, v128h, v128h, 1>;
+    def VMXF : BinaryVRRc<"vmxf", 0xE7FF, smax, v128f, v128f, 2>;
+    def VMXG : BinaryVRRc<"vmxg", 0xE7FF, smax, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
       def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, null_frag, v128q, v128q, 4>;
 
     // Maximum logical.
     def VMXL  : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
-    def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
-    def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
-    def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
-    def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+    def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, umax, v128b, v128b, 0>;
+    def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, umax, v128h, v128h, 1>;
+    def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, umax, v128f, v128f, 2>;
+    def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, umax, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
       def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, null_frag, v128q, v128q, 4>;
   }
@@ -700,19 +700,19 @@ let Predicates = [FeatureVector] in {
   let isCommutable = 1 in {
     // Minimum.
     def VMN  : BinaryVRRcGeneric<"vmn", 0xE7FE>;
-    def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
-    def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
-    def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
-    def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+    def VMNB : BinaryVRRc<"vmnb", 0xE7FE, smin, v128b, v128b, 0>;
+    def VMNH : BinaryVRRc<"vmnh", 0xE7FE, smin, v128h, v128h, 1>;
+    def VMNF : BinaryVRRc<"vmnf", 0xE7FE, smin, v128f, v128f, 2>;
+    def VMNG : BinaryVRRc<"vmng", 0xE7FE, smin, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
       def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, null_frag, v128q, v128q, 4>;
 
     // Minimum logical.
     def VMNL  : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
-    def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
-    def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
-    def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
-    def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+    def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, umin, v128b, v128b, 0>;
+    def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, umin, v128h, v128h, 1>;
+    def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, umin, v128f, v128f, 2>;
+    def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, umin, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
       def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, null_frag, v128q, v128q, 4>;
   }

>From 76bf0f35c41780fe6cb02e933b57934a38b74e0e Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Tue, 26 Aug 2025 15:39:55 +0200
Subject: [PATCH 2/4] s390x: legalize smin/smax/umin/umax for v128q

---
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp |  3 +++
 llvm/lib/Target/SystemZ/SystemZInstrVector.td   |  8 ++++----
 llvm/test/CodeGen/SystemZ/int-max-02.ll         | 16 ++++++++--------
 llvm/test/CodeGen/SystemZ/int-min-02.ll         | 16 ++++++++--------
 4 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 43c6e9a6d7514..040909949dc1d 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -287,6 +287,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     // Additional instructions available with z17.
     if (Subtarget.hasVectorEnhancements3()) {
       setOperationAction(ISD::ABS, MVT::i128, Legal);
+
+      setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
+                         MVT::i128, Legal);
     }
   }
 
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index a5a121ba31711..40ad0e842ef5d 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -685,7 +685,7 @@ let Predicates = [FeatureVector] in {
     def VMXF : BinaryVRRc<"vmxf", 0xE7FF, smax, v128f, v128f, 2>;
     def VMXG : BinaryVRRc<"vmxg", 0xE7FF, smax, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
-      def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, null_frag, v128q, v128q, 4>;
+      def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, smax, v128q, v128q, 4>;
 
     // Maximum logical.
     def VMXL  : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
@@ -694,7 +694,7 @@ let Predicates = [FeatureVector] in {
     def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, umax, v128f, v128f, 2>;
     def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, umax, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
-      def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, null_frag, v128q, v128q, 4>;
+      def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, umax, v128q, v128q, 4>;
   }
 
   let isCommutable = 1 in {
@@ -705,7 +705,7 @@ let Predicates = [FeatureVector] in {
     def VMNF : BinaryVRRc<"vmnf", 0xE7FE, smin, v128f, v128f, 2>;
     def VMNG : BinaryVRRc<"vmng", 0xE7FE, smin, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
-      def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, null_frag, v128q, v128q, 4>;
+      def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, smin, v128q, v128q, 4>;
 
     // Minimum logical.
     def VMNL  : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
@@ -714,7 +714,7 @@ let Predicates = [FeatureVector] in {
     def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, umin, v128f, v128f, 2>;
     def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, umin, v128g, v128g, 3>;
     let Predicates = [FeatureVectorEnhancements3] in
-      def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, null_frag, v128q, v128q, 4>;
+      def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, umin, v128q, v128q, 4>;
   }
 
   let isCommutable = 1 in {
diff --git a/llvm/test/CodeGen/SystemZ/int-max-02.ll b/llvm/test/CodeGen/SystemZ/int-max-02.ll
index 5f5188c66065d..00fd01a0ccd63 100644
--- a/llvm/test/CodeGen/SystemZ/int-max-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-max-02.ll
@@ -7,8 +7,8 @@
 define i128 @f1(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r3), 3
-; CHECK-NEXT:    vl %v1, 0(%r4), 3
+; CHECK-NEXT:    vl %v0, 0(%r4), 3
+; CHECK-NEXT:    vl %v1, 0(%r3), 3
 ; CHECK-NEXT:    vmxq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
@@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) {
 define i128 @f4(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r3), 3
-; CHECK-NEXT:    vl %v1, 0(%r4), 3
+; CHECK-NEXT:    vl %v0, 0(%r4), 3
+; CHECK-NEXT:    vl %v1, 0(%r3), 3
 ; CHECK-NEXT:    vmxq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
@@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) {
 define i128 @f5(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r3), 3
-; CHECK-NEXT:    vl %v1, 0(%r4), 3
+; CHECK-NEXT:    vl %v0, 0(%r4), 3
+; CHECK-NEXT:    vl %v1, 0(%r3), 3
 ; CHECK-NEXT:    vmxlq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
@@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) {
 define i128 @f8(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r3), 3
-; CHECK-NEXT:    vl %v1, 0(%r4), 3
+; CHECK-NEXT:    vl %v0, 0(%r4), 3
+; CHECK-NEXT:    vl %v1, 0(%r3), 3
 ; CHECK-NEXT:    vmxlq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
diff --git a/llvm/test/CodeGen/SystemZ/int-min-02.ll b/llvm/test/CodeGen/SystemZ/int-min-02.ll
index 3066af924fb8e..f13db7c4b8995 100644
--- a/llvm/test/CodeGen/SystemZ/int-min-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-min-02.ll
@@ -7,8 +7,8 @@
 define i128 @f1(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r4), 3
-; CHECK-NEXT:    vl %v1, 0(%r3), 3
+; CHECK-NEXT:    vl %v0, 0(%r3), 3
+; CHECK-NEXT:    vl %v1, 0(%r4), 3
 ; CHECK-NEXT:    vmnq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
@@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) {
 define i128 @f4(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r4), 3
-; CHECK-NEXT:    vl %v1, 0(%r3), 3
+; CHECK-NEXT:    vl %v0, 0(%r3), 3
+; CHECK-NEXT:    vl %v1, 0(%r4), 3
 ; CHECK-NEXT:    vmnq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
@@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) {
 define i128 @f5(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r4), 3
-; CHECK-NEXT:    vl %v1, 0(%r3), 3
+; CHECK-NEXT:    vl %v0, 0(%r3), 3
+; CHECK-NEXT:    vl %v1, 0(%r4), 3
 ; CHECK-NEXT:    vmnlq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14
@@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) {
 define i128 @f8(i128 %val1, i128 %val2) {
 ; CHECK-LABEL: f8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vl %v0, 0(%r4), 3
-; CHECK-NEXT:    vl %v1, 0(%r3), 3
+; CHECK-NEXT:    vl %v0, 0(%r3), 3
+; CHECK-NEXT:    vl %v1, 0(%r4), 3
 ; CHECK-NEXT:    vmnlq %v0, %v1, %v0
 ; CHECK-NEXT:    vst %v0, 0(%r2), 3
 ; CHECK-NEXT:    br %r14

>From 656bd57bafc7a9087102c3c007f247df5347993f Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Tue, 26 Aug 2025 11:40:26 +0200
Subject: [PATCH 3/4] s390x: map saturating truncation to a packs/packu

---
 llvm/lib/Target/SystemZ/SystemZInstrVector.td | 40 ++++++++
 llvm/lib/Target/SystemZ/SystemZOperators.td   | 25 +++++
 .../CodeGen/SystemZ/saturating-truncation.ll  | 95 +++++++++++++++++++
 3 files changed, 160 insertions(+)
 create mode 100644 llvm/test/CodeGen/SystemZ/saturating-truncation.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 40ad0e842ef5d..390dd67f239c5 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1299,6 +1299,46 @@ let Predicates = [FeatureVectorEnhancements3] in {
             (VMNLQ VR128:$x, VR128:$y)>;
 }
 
+// Instantiate packs/packu: recognize a saturating truncation and convert
+// into the corresponding packs/packu instruction.
+multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
+                                    Instruction packs> {
+  def : Pat<
+    (output (z_pack
+      (smin (smax (input VR128:$a), ssat_trunc_min_vec), ssat_trunc_max_vec),
+      (smin (smax (input VR128:$b), ssat_trunc_min_vec), ssat_trunc_max_vec)
+    )),
+    (packs VR128:$a, VR128:$b)
+  >;
+
+  def : Pat<
+    (output (z_pack
+      (smax (smin (input VR128:$a), ssat_trunc_max_vec), ssat_trunc_min_vec),
+      (smax (smin (input VR128:$b), ssat_trunc_max_vec), ssat_trunc_min_vec)
+    )),
+    (packs VR128:$a, VR128:$b)
+  >;
+}
+
+defm : SignedSaturatingTruncate<v8i16, v16i8, VPKSH>;
+defm : SignedSaturatingTruncate<v4i32, v8i16, VPKSF>;
+defm : SignedSaturatingTruncate<v2i64, v4i32, VPKSG>;
+
+multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
+                                      Instruction packu> {
+  def : Pat<
+    (output (z_pack
+      (umin (input VR128:$a), usat_trunc_max_vec),
+      (umin (input VR128:$b), usat_trunc_max_vec)
+    )),
+    (packu VR128:$a, VR128:$b)
+  >;
+}
+
+defm : UnsignedSaturatingTruncate<v8i16, v16i8, VPKLSH>;
+defm : UnsignedSaturatingTruncate<v4i32, v8i16, VPKLSF>;
+defm : UnsignedSaturatingTruncate<v2i64, v4i32, VPKLSG>;
+
 // Instantiate comparison patterns to recognize VACC/VSCBI for TYPE.
 multiclass IntegerComputeCarryOrBorrow<ValueType type,
                                        Instruction vacc, Instruction vscbi> {
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 39e216b993b11..547d3dcf92804 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -1067,6 +1067,31 @@ def vsplat_imm_eq_1 : PatFrag<(ops), (build_vector), [{
 }]>;
 def z_vzext1 : PatFrag<(ops node:$x), (and node:$x, vsplat_imm_eq_1)>;
 
+// Vector constants for saturating truncation, containing the minimum and
+// maximum value for the integer type that is half of the element width.
+def ssat_trunc_min_vec: PatFrag<(ops), (build_vector), [{
+  APInt Imm;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+  unsigned SizeInBits = EltTy.getSizeInBits();
+  APInt min = APInt::getSignedMinValue(SizeInBits / 2).sext(SizeInBits);
+  return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, min);
+}]>;
+def ssat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
+  APInt Imm;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+  unsigned SizeInBits = EltTy.getSizeInBits();
+  APInt max = APInt::getSignedMaxValue(SizeInBits / 2).sext(SizeInBits);
+  return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
+}]>;
+
+def usat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
+  APInt Imm;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+  unsigned SizeInBits = EltTy.getSizeInBits();
+  APInt max = APInt::getMaxValue(SizeInBits / 2).zext(SizeInBits);
+  return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
+}]>;
+
 // Signed "integer greater than zero" on vectors.
 def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
 
diff --git a/llvm/test/CodeGen/SystemZ/saturating-truncation.ll b/llvm/test/CodeGen/SystemZ/saturating-truncation.ll
new file mode 100644
index 0000000000000..0ea29202c1ef5
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/saturating-truncation.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
+
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
+
+define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_signed:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpksh %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
+  %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
+  %3 = trunc nsw <16 x i16> %2 to <16 x i8>
+  ret <16 x i8> %3
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpksf %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
+  %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
+  %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @i64_signed(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: i64_signed:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpksg %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %0, <4 x i64> splat (i64 -2147483648))
+  %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> splat (i64 2147483647))
+  %3 = trunc nsw <4 x i64> %2 to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @i64_signed_flipped(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: i64_signed_flipped:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpksg %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> splat (i64 2147483647), <4 x i64> %0)
+  %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> splat (i64 -2147483648), <4 x i64> %1)
+  %3 = trunc nsw <4 x i64> %2 to <4 x i32>
+  ret <4 x i32> %3
+}
+
+define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_unsigned:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpklsh %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
+  %2 = trunc nuw <16 x i16> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_unsigned:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpklsf %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
+  %2 = trunc nsw <8 x i32> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @i64_unsigned(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: i64_unsigned:
+; CHECK:       # %bb.0: # %bb2
+; CHECK-NEXT:    vpklsg %v24, %v24, %v26
+; CHECK-NEXT:    br %r14
+bb2:
+  %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %0, <4 x i64> splat (i64 4294967295))
+  %2 = trunc nuw <4 x i64> %1 to <4 x i32>
+  ret <4 x i32> %2
+}

>From 47175b2f86e6d6877bee8a995d134269d4fc7591 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert at folkertdev.nl>
Date: Tue, 26 Aug 2025 16:02:52 +0200
Subject: [PATCH 4/4] s390x: remove `IntegerMinMaxVectorOps` multiclass

The more general smin/umin/smax/umax are now legal, so these patterns are no longer needed
---
 llvm/lib/Target/SystemZ/SystemZInstrVector.td | 49 -------------------
 1 file changed, 49 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 390dd67f239c5..479bab5ce62b8 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1250,55 +1250,6 @@ defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
 defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
 defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
 
-// Instantiate minimum- and maximum-related patterns for TYPE.  CMPH is the
-// signed or unsigned "set if greater than" comparison instruction and
-// MIN and MAX are the associated minimum and maximum instructions.
-multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
-                                  Instruction min, Instruction max> {
-  let Predicates = [FeatureVector] in {
-    def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
-              (max VR128:$x, VR128:$y)>;
-    def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
-              (min VR128:$x, VR128:$y)>;
-    def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
-                             VR128:$x, VR128:$y)),
-              (min VR128:$x, VR128:$y)>;
-    def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
-                             VR128:$y, VR128:$x)),
-              (max VR128:$x, VR128:$y)>;
-  }
-}
-
-// Signed min/max.
-defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
-defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
-defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
-defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
-
-let Predicates = [FeatureVectorEnhancements3] in {
-  def : Pat<(i128 (or (and VR128:$x, (z_vicmph VR128:$x, VR128:$y)),
-                      (and VR128:$y, (not (z_vicmph VR128:$x, VR128:$y))))),
-            (VMXQ VR128:$x, VR128:$y)>;
-  def : Pat<(i128 (or (and VR128:$y, (z_vicmph VR128:$x, VR128:$y)),
-                      (and VR128:$x, (not (z_vicmph VR128:$x, VR128:$y))))),
-            (VMNQ VR128:$x, VR128:$y)>;
-}
-
-// Unsigned min/max.
-defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
-defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
-defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
-defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
-
-let Predicates = [FeatureVectorEnhancements3] in {
-  def : Pat<(i128 (or (and VR128:$x, (z_vicmphl VR128:$x, VR128:$y)),
-                      (and VR128:$y, (not (z_vicmphl VR128:$x, VR128:$y))))),
-            (VMXLQ VR128:$x, VR128:$y)>;
-  def : Pat<(i128 (or (and VR128:$y, (z_vicmphl VR128:$x, VR128:$y)),
-                      (and VR128:$x, (not (z_vicmphl VR128:$x, VR128:$y))))),
-            (VMNLQ VR128:$x, VR128:$y)>;
-}
-
 // Instantiate packs/packu: recognize a saturating truncation and convert
 // into the corresponding packs/packu instruction.
 multiclass SignedSaturatingTruncate<ValueType input, ValueType output,