[llvm] [DAG] Support saturated truncate (PR #99418)

Thu Aug 8 08:46:32 PDT 2024

https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/99418

>From e3baff292e25990a1db3b3b8e41b9952674c7441 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 16 Jul 2024 13:52:29 +0900
Subject: [PATCH 01/31] [DAG] Support saturated truncate

`truncate` is `saturated` if no additional conversion is required
between the target and return values. if the target is `saturated`
when attempting to crop from a `vector`, there is an opportunity
to optimize it.

previously, each architecture had an attemping optimization, so there
was redundant code.

this patch implements common logic by adding `ISD::TRUNCATE_[US]SAT`
to indicate saturated truncate.

Fixes #85903
---
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   7 +
 .../include/llvm/Target/TargetSelectionDAG.td |   3 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 165 +++++++++++++++++-
 .../SelectionDAG/SelectionDAGDumper.cpp       |   3 +
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |   5 +
 5 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 5b657fb1712968..f67c6ee3d55dc2 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -814,6 +814,13 @@ enum NodeType {
 
   /// TRUNCATE - Completely drop the high bits.
   TRUNCATE,
+  /// TRUNCATE_[SU]SAT - Truncate for saturated operand
+  TRUNCATE_SSAT_S, // saturate signed input to signed result -
+                   // truncate(smin(smax(x)))
+  TRUNCATE_SSAT_U, // saturate signed input to unsigned result -
+                   // truncate(smin(smax(x,0)))
+  TRUNCATE_USAT_U, // saturate unsigned input to unsigned result -
+                   // truncate(umin(x))
 
   /// [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
   /// depends on the first letter) to floating point.
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 46044aab79a832..92d10a94bd81e5 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -477,6 +477,9 @@ def sext       : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
 def zext       : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
 def anyext     : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
 def trunc      : SDNode<"ISD::TRUNCATE"   , SDTIntTruncOp>;
+def truncssat_s : SDNode<"ISD::TRUNCATE_SSAT_S", SDTIntTruncOp>;
+def truncssat_u : SDNode<"ISD::TRUNCATE_SSAT_U", SDTIntTruncOp>;
+def truncusat_u : SDNode<"ISD::TRUNCATE_USAT_U", SDTIntTruncOp>;
 def bitconvert : SDNode<"ISD::BITCAST"    , SDTUnaryOp>;
 def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>;
 def freeze     : SDNode<"ISD::FREEZE"     , SDTFreeze>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 060e66175d965c..8840aa7be2a5d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -486,6 +486,7 @@ namespace {
     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
     SDValue visitTRUNCATE(SDNode *N);
+    SDValue visitTRUNCATE_USAT(SDNode *N);
     SDValue visitBITCAST(SDNode *N);
     SDValue visitFREEZE(SDNode *N);
     SDValue visitBUILD_PAIR(SDNode *N);
@@ -1908,6 +1909,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ZERO_EXTEND_VECTOR_INREG:
   case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
   case ISD::TRUNCATE:           return visitTRUNCATE(N);
+  case ISD::TRUNCATE_USAT_U:
+  case ISD::TRUNCATE_SSAT_U:    return visitTRUNCATE_USAT(N);
   case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   case ISD::FADD:               return visitFADD(N);
@@ -13203,7 +13206,9 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
   unsigned CastOpcode = Cast->getOpcode();
   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
-          CastOpcode == ISD::FP_ROUND) &&
+          CastOpcode == ISD::TRUNCATE_SSAT_S ||
+          CastOpcode == ISD::TRUNCATE_SSAT_U ||
+          CastOpcode == ISD::TRUNCATE_USAT_U || CastOpcode == ISD::FP_ROUND) &&
          "Unexpected opcode for vector select narrowing/widening");
 
   // We only do this transform before legal ops because the pattern may be
@@ -14915,6 +14920,159 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue FPInstr = N0.getOpcode() == ISD::SMAX ? N0.getOperand(0) : N0;
+  if (FPInstr.getOpcode() == ISD::FP_TO_SINT ||
+      FPInstr.getOpcode() == ISD::FP_TO_UINT) {
+    EVT FPVT = FPInstr.getOperand(0).getValueType();
+    if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
+                                                          FPVT, VT))
+      return SDValue();
+    SDValue Sat = DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(FPInstr), VT,
+                              FPInstr.getOperand(0),
+                              DAG.getValueType(VT.getScalarType()));
+    return Sat;
+  }
+
+  return SDValue();
+}
+
+/// Detect patterns of truncation with unsigned saturation:
+///
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value x to be truncated or SDValue() if the pattern was
+/// not matched.
+///
+static SDValue detectUSatUPattern(SDValue In, EVT VT) {
+  EVT InVT = In.getValueType();
+
+  // Saturation with truncation. We truncate from InVT to VT.
+  assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
+         "Unexpected types for truncate operation");
+
+  // Match min/max and return limit value as a parameter.
+  auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
+    if (V.getOpcode() == Opcode &&
+        ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
+      return V.getOperand(0);
+    return SDValue();
+  };
+
+  APInt C1, C2;
+  if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2))
+    // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+    // the element size of the destination type.
+    if (C2.isMask(VT.getScalarSizeInBits()))
+      return UMin;
+
+  return SDValue();
+}
+
+/// Detect patterns of truncation with signed saturation:
+/// (truncate (smin ((smax (x, signed_min_of_dest_type)),
+///                  signed_max_of_dest_type)) to dest_type)
+/// or:
+/// (truncate (smax ((smin (x, signed_max_of_dest_type)),
+///                  signed_min_of_dest_type)) to dest_type).
+/// With MatchPackUS, the smax/smin range is [0, unsigned_max_of_dest_type].
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectSSatSPattern(SDValue In, EVT VT) {
+  unsigned NumDstBits = VT.getScalarSizeInBits();
+  unsigned NumSrcBits = In.getScalarValueSizeInBits();
+  assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
+
+  auto MatchMinMax = [](SDValue V, unsigned Opcode,
+                        const APInt &Limit) -> SDValue {
+    APInt C;
+    if (V.getOpcode() == Opcode &&
+        ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit)
+      return V.getOperand(0);
+    return SDValue();
+  };
+
+  APInt SignedMax, SignedMin;
+  SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
+  SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
+  if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) {
+    if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) {
+      return SMax;
+    }
+  }
+  if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) {
+    if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) {
+      return SMin;
+    }
+  }
+  return SDValue();
+}
+
+/// Detect patterns of truncation with unsigned saturation:
+///
+/// (truncate (smin (smax (x, C1), C2)) to dest_type),
+/// where C1 >= 0 and C2 is unsigned max of destination type.
+///
+/// (truncate (smax (smin (x, C2), C1)) to dest_type)
+/// where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
+///
+static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
+                                  const SDLoc &DL) {
+  EVT InVT = In.getValueType();
+
+  // Saturation with truncation. We truncate from InVT to VT.
+  assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
+         "Unexpected types for truncate operation");
+
+  // Match min/max and return limit value as a parameter.
+  auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
+    if (V.getOpcode() == Opcode &&
+        ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
+      return V.getOperand(0);
+    return SDValue();
+  };
+
+  APInt C1, C2;
+  if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2))
+    if (MatchMinMax(SMin, ISD::SMAX, C1))
+      if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))
+        return SMin;
+
+  if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1))
+    if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2))
+      if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&
+          C2.uge(C1))
+        return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
+
+  return SDValue();
+}
+
+static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
+                               SDLoc &DL, const TargetLowering &TLI,
+                               SelectionDAG &DAG) {
+  if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
+    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_S, SrcVT) &&
+        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_S, VT)) {
+      if (SDValue SSatVal = detectSSatSPattern(Src, VT))
+        return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
+    } else if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_U, SrcVT) &&
+               TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_U, VT)) {
+      if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
+        return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
+    }
+  } else if (Src.getOpcode() == ISD::UMIN) {
+    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_USAT_U, SrcVT) &&
+        TLI.isTypeDesirableForOp(ISD::TRUNCATE_USAT_U, VT)) {
+      if (SDValue USatVal = detectUSatUPattern(Src, VT)) {
+        return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -14930,6 +15088,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   if (N0.getOpcode() == ISD::TRUNCATE)
     return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
 
+  // fold satruated truncate
+  if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG)) {
+    return SaturatedTR;
+  }
+
   // fold (truncate c1) -> c1
   if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
     return C;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 16fc52caebb757..46e8e54ee4ed7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -380,6 +380,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::SIGN_EXTEND_VECTOR_INREG:   return "sign_extend_vector_inreg";
   case ISD::ZERO_EXTEND_VECTOR_INREG:   return "zero_extend_vector_inreg";
   case ISD::TRUNCATE:                   return "truncate";
+  case ISD::TRUNCATE_SSAT_S:            return "truncate_ssat_s";
+  case ISD::TRUNCATE_SSAT_U:            return "truncate_ssat_u";
+  case ISD::TRUNCATE_USAT_U:            return "truncate_usat_u";
   case ISD::FP_ROUND:                   return "fp_round";
   case ISD::STRICT_FP_ROUND:            return "strict_fp_round";
   case ISD::FP_EXTEND:                  return "fp_extend";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6ca9955993d242..149b5dabee0565 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -753,6 +753,11 @@ void TargetLoweringBase::initActions() {
     // Absolute difference
     setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
 
+    // Saturated trunc
+    setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand);
+    setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand);
+    setOperationAction(ISD::TRUNCATE_USAT_U, VT, Expand);
+
     // These default to Expand so they will be expanded to CTLZ/CTTZ by default.
     setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
                        Expand);

>From e53f9bde236d0a87b7a00799d6a654ae7cbb9914 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 16 Jul 2024 14:05:55 +0900
Subject: [PATCH 02/31] [AArch64] Support saturated truncate

Add support for `ISD::TRUNCATE_[US]SAT`.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 18 ++++++++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  5 +++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 44 +++++++++----------
 llvm/test/CodeGen/AArch64/qmovn.ll            | 12 ++---
 4 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d86e52d49000ae..c42dc9d4fc3b25 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1410,6 +1410,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     }
   }
 
+  for (MVT VT : {MVT::v8i16, MVT::v4i32}) {
+    setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Custom);
+    setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Custom);
+    setOperationAction(ISD::TRUNCATE_USAT_U, VT, Custom);
+  }
+
   if (Subtarget->hasSME()) {
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   }
@@ -28730,6 +28736,18 @@ bool AArch64TargetLowering::hasInlineStackProbe(
          MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
 }
 
+bool AArch64TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+  switch (Opc) {
+  case ISD::TRUNCATE_SSAT_S:
+  case ISD::TRUNCATE_SSAT_U:
+  case ISD::TRUNCATE_USAT_U:
+    if (VT == MVT::v8i8 || VT == MVT::v4i16)
+      return true;
+  }
+
+  return TargetLowering::isTypeDesirableForOp(Opc, VT);
+}
+
 #ifndef NDEBUG
 void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const {
   switch (N->getOpcode()) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 81e15185f985d5..50e26612ac863e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -743,6 +743,11 @@ class AArch64TargetLowering : public TargetLowering {
   bool generateFMAsInMachineCombiner(EVT VT,
                                      CodeGenOptLevel OptLevel) const override;
 
+  /// Return true if the target has native support for
+  /// the specified value type and it is 'desirable' to use the type for the
+  /// given node type.
+  bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
+
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1053ba9242768a..ac42f9cb6eb63f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5418,64 +5418,60 @@ def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
 def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
 
 // trunc(umin(X, 255)) -> UQXTRN v8i8
-def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
+def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))),
           (UQXTNv8i8 V128:$Vn)>;
 // trunc(umin(X, 65535)) -> UQXTRN v4i16
-def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
+def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
           (UQXTNv4i16 V128:$Vn)>;
 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
 //  with reversed min/max
-def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
-                             (v8i16 VImm7F)))),
-          (SQXTNv8i8 V128:$Vn)>;
-def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
-                             (v8i16 VImm80)))),
+def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
           (SQXTNv8i8 V128:$Vn)>;
+// trunc(umin(smax(X, 0), 255)) -> SQXTUN
+def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
+          (SQXTUNv8i8 V128:$Vn)>;
 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
 //  with reversed min/max
-def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
-                              (v4i32 VImm7FFF)))),
-          (SQXTNv4i16 V128:$Vn)>;
-def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
-                              (v4i32 VImm8000)))),
+def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
           (SQXTNv4i16 V128:$Vn)>;
+// trunc(umin(smax(X, 0), 65535)) -> SQXTUN
+def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
+          (SQXTUNv4i16 V128:$Vn)>;
 
 // concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
-                 (v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
+                 (v8i8 (truncusat_u (v8i16 V128:$Vn))))),
           (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 // concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
-                 (v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
+                 (v4i16 (truncusat_u (v4i32 V128:$Vn))))),
           (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
 // with reversed min/max
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
-                 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
-                                          (v8i16 VImm7F)))))),
+                 (v8i8 (truncssat_s (v8i16 V128:$Vn))))),
           (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(smin(smax Vm, 0), 127) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
-                 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
-                                          (v8i16 VImm80)))))),
-          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+                 (v8i8 (truncssat_u (v8i16 V128:$Vn))))),
+          (SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
 // with reversed min/max
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
-                 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
-                                           (v4i32 VImm7FFF)))))),
+                 (v4i16 (truncssat_s (v4i32 V128:$Vn))))),
           (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(smin(smax Vm, 0), 32767) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
-                 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
-                                           (v4i32 VImm8000)))))),
-          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+                 (v4i16 (truncssat_u (v4i32 V128:$Vn))))),
+          (SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
 // Select BSWAP vector instructions into REV instructions
 def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), 
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 35c172adbad3d8..0b19a9ff7e3dd9 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -292,15 +292,15 @@ entry:
 
 ; Test the (concat_vectors (X), (trunc(umin(smax(Y, 0), 2^n))))) pattern.
 
+; TODO: %min is a value between 0 and 255 and is within the unsigned range of i8.
+; So it is saturated truncate. we have an optimization opportunity.
 define <16 x i8> @us_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: us_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    movi v3.2d, #0xff00ff00ff00ff
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smax v1.8h, v1.8h, v2.8h
-; CHECK-NEXT:    smin v1.8h, v1.8h, v3.8h
-; CHECK-NEXT:    xtn2 v0.16b, v1.8h
+; CHECK-NEXT:    uqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
   %max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %y, <8 x i16> zeroinitializer)
@@ -310,15 +310,15 @@ entry:
   ret <16 x i8> %shuffle
 }
 
+; TODO: %min is a value between 0 and 65535 and is within the unsigned range of i16.
+; So it is saturated. we have an optimization opportunity.
 define <8 x i16> @us_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: us_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    movi v2.2d, #0x00ffff0000ffff
-; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
-; CHECK-NEXT:    xtn2 v0.8h, v1.4s
+; CHECK-NEXT:    uqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
   %max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> zeroinitializer)

>From 5523f7ae23ab40249ab59d467afe8d9cc025b493 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 16 Jul 2024 14:14:40 +0900
Subject: [PATCH 03/31] [RISCV] Support saturated truncate

Add support for `ISD::TRUNCATE_[US]SAT`.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 25 ++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d40d4997d76149..704caeab90bb6e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -853,7 +853,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
       // nodes which truncate by one power of two at a time.
-      setOperationAction(ISD::TRUNCATE, VT, Custom);
+      setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
+                          ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
+                         VT, Custom);
 
       // Custom-lower insert/extract operations to simplify patterns.
       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
@@ -1168,7 +1170,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
         setOperationAction(ISD::SELECT, VT, Custom);
 
-        setOperationAction(ISD::TRUNCATE, VT, Custom);
+        setOperationAction({ISD::TRUNCATE, ISD::TRUNCATE_SSAT_S,
+                            ISD::TRUNCATE_SSAT_U, ISD::TRUNCATE_USAT_U},
+                           VT, Custom);
 
         setOperationAction(ISD::BITCAST, VT, Custom);
 
@@ -6395,6 +6399,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
   }
   case ISD::TRUNCATE:
+  case ISD::TRUNCATE_SSAT_S:
+  case ISD::TRUNCATE_SSAT_U:
+  case ISD::TRUNCATE_USAT_U:
     // Only custom-lower vector truncates
     if (!Op.getSimpleValueType().isVector())
       return Op;
@@ -8234,7 +8241,8 @@ SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
 
 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
                                                   SelectionDAG &DAG) const {
-  bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
+  unsigned Opc = Op.getOpcode();
+  bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
   SDLoc DL(Op);
 
   MVT VT = Op.getSimpleValueType();
@@ -8279,11 +8287,18 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
   }
 
+  unsigned NewOpc;
+  if (Opc == ISD::TRUNCATE_SSAT_S)
+    NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
+  else if (Opc == ISD::TRUNCATE_SSAT_U || Opc == ISD::TRUNCATE_USAT_U)
+    NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
+  else
+    NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
+
   do {
     SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
     MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
-    Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
-                         Mask, VL);
+    Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
   } while (SrcEltVT != DstEltVT);
 
   if (SrcVT.isFixedLengthVector())

>From 60b086186ebafd697cd62af7d8c8b71ed05ce787 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sat, 27 Jul 2024 23:07:53 +0900
Subject: [PATCH 04/31] fix: remove unnecessary code

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8840aa7be2a5d7..8dbfdedcfc85c8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14930,10 +14930,9 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
     if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
                                                           FPVT, VT))
       return SDValue();
-    SDValue Sat = DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(FPInstr), VT,
-                              FPInstr.getOperand(0),
-                              DAG.getValueType(VT.getScalarType()));
-    return Sat;
+    return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(FPInstr), VT,
+                       FPInstr.getOperand(0),
+                       DAG.getValueType(VT.getScalarType()));
   }
 
   return SDValue();
@@ -15089,9 +15088,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
 
   // fold satruated truncate
-  if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG)) {
+  if (SDValue SaturatedTR = foldToSaturated(N, VT, N0, SrcVT, DL, TLI, DAG))
     return SaturatedTR;
-  }
 
   // fold (truncate c1) -> c1
   if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))

>From 0b7c690ab9a90cfbf488937b1142dc108c8f8700 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 01:36:57 +0900
Subject: [PATCH 05/31] fix: truncusat_u(smax(fp_to_[us]int, x))) to
 fp_to_uint_sat

We can transform `truncusat_u(smax(fp_to_[us]int, x)))` or
`truncusat_u(fp_to_[us]int, x))` to `FP_TO_UINT_SAT`.

Because smax is commutative, make sure that the operands
of smax are each 'FP_TO_[US]INT`.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8dbfdedcfc85c8..806ec8ac547ab6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14923,9 +14923,23 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
 SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
-  SDValue FPInstr = N0.getOpcode() == ISD::SMAX ? N0.getOperand(0) : N0;
-  if (FPInstr.getOpcode() == ISD::FP_TO_SINT ||
-      FPInstr.getOpcode() == ISD::FP_TO_UINT) {
+
+  auto MatchFPTOINT = [&](SDValue Val) -> SDValue {
+    if (Val.getOpcode() == ISD::FP_TO_SINT ||
+        Val.getOpcode() == ISD::FP_TO_UINT)
+      return Val;
+    return SDValue();
+  };
+
+  SDValue FPInstr;
+  if (N0.getOpcode() == ISD::SMAX) {
+    FPInstr = MatchFPTOINT(N0.getOperand(0));
+    if (!FPInstr)
+      FPInstr = MatchFPTOINT(N0.getOperand(1));
+  } else
+    FPInstr = MatchFPTOINT(N0);
+
+  if (FPInstr) {
     EVT FPVT = FPInstr.getOperand(0).getValueType();
     if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
                                                           FPVT, VT))

>From 750f061ca4a760c1f08ac859b74be4f501bcf0e8 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 02:24:34 +0900
Subject: [PATCH 06/31] add comments for added opcode

Also added operand for instruction `min/max`  for `truncate`
which was commented out.
---
 llvm/include/llvm/CodeGen/ISDOpcodes.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index f67c6ee3d55dc2..7305e3086fcd65 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -814,13 +814,26 @@ enum NodeType {
 
   /// TRUNCATE - Completely drop the high bits.
   TRUNCATE,
-  /// TRUNCATE_[SU]SAT - Truncate for saturated operand
+  /// TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand
+  /// [SU] located in middle, prefix for `SAT` means indicates whether
+  /// existing truncate target was a signed operation. For examples,
+  /// If `truncate(smin(smax(x, C), C))` was saturated then become `S`.
+  /// If `truncate(umin(x, C))` was saturated then become `U`.
+  /// [SU] located in last indicates whether range of truncated values is
+  /// sign-saturated. For example, if `truncate(smin(smax(x, C), C))` is a
+  /// truncation to `i8`, then if value of C ranges from `-128 to 127`, it will
+  /// be saturated against signed values, resulting in `S`, which will combine
+  /// to `TRUNCATE_SSAT_S`. If the value of C ranges from `0 to 255`, it will
+  /// be saturated against unsigned values, resulting in `U`, which will
+  /// combine to `TRUNATE_SSAT_U`. Similarly, in `truncate(umin(x, C))`, if
+  /// value of C ranges from `0 to 255`, it becomes `U` because it is saturated
+  /// for unsigned values. As a result, it combines to `TRUNCATE_USAT_U`.
   TRUNCATE_SSAT_S, // saturate signed input to signed result -
-                   // truncate(smin(smax(x)))
+                   // truncate(smin(smax(x, C), C))
   TRUNCATE_SSAT_U, // saturate signed input to unsigned result -
-                   // truncate(smin(smax(x,0)))
+                   // truncate(smin(smax(x, 0), C))
   TRUNCATE_USAT_U, // saturate unsigned input to unsigned result -
-                   // truncate(umin(x))
+                   // truncate(umin(x, C))
 
   /// [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
   /// depends on the first letter) to floating point.

>From a5666a72e6f1e086cfcefda806922056a8cee234 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 02:37:07 +0900
Subject: [PATCH 07/31] aarch64: change action for TRUNCATE_[US]SAT from Custom
 to Legal

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c42dc9d4fc3b25..309b5945a6f4a2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1411,9 +1411,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   }
 
   for (MVT VT : {MVT::v8i16, MVT::v4i32}) {
-    setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Custom);
-    setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Custom);
-    setOperationAction(ISD::TRUNCATE_USAT_U, VT, Custom);
+    setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Legal);
+    setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Legal);
+    setOperationAction(ISD::TRUNCATE_USAT_U, VT, Legal);
   }
 
   if (Subtarget->hasSME()) {

>From e96657b1427518c5b4a6a840025a3cdd9b16b608 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 02:46:34 +0900
Subject: [PATCH 08/31] aarch64: adding v2i64 -> v2i32 pattern

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  4 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  6 ++
 llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 16 ++---
 llvm/test/CodeGen/AArch64/qmovn.ll            | 62 ++++---------------
 4 files changed, 23 insertions(+), 65 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 309b5945a6f4a2..2ebe2ff2712d6d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1410,7 +1410,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     }
   }
 
-  for (MVT VT : {MVT::v8i16, MVT::v4i32}) {
+  for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
     setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Legal);
     setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Legal);
     setOperationAction(ISD::TRUNCATE_USAT_U, VT, Legal);
@@ -28741,7 +28741,7 @@ bool AArch64TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
   case ISD::TRUNCATE_SSAT_S:
   case ISD::TRUNCATE_SSAT_U:
   case ISD::TRUNCATE_USAT_U:
-    if (VT == MVT::v8i8 || VT == MVT::v4i16)
+    if (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32)
       return true;
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ac42f9cb6eb63f..630411da9bbce7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5423,6 +5423,9 @@ def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))),
 // trunc(umin(X, 65535)) -> UQXTRN v4i16
 def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
           (UQXTNv4i16 V128:$Vn)>;
+// trunc(umin(X, 4294967295)) -> UQXTRN v2i32
+def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))),
+          (UQXTNv2i32 V128:$Vn)>;
 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
 //  with reversed min/max
 def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
@@ -5434,6 +5437,9 @@ def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
 //  with reversed min/max
 def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
           (SQXTNv4i16 V128:$Vn)>;
+// trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
+def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))),
+          (SQXTNv4i16 V128:$Vn)>;
 // trunc(umin(smax(X, 0), 65535)) -> SQXTUN
 def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
           (SQXTUNv4i16 V128:$Vn)>;
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 0138bef9c38454..e73df1b87c5dc7 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -7,12 +7,8 @@
 define <2 x i32> @stest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
@@ -656,12 +652,8 @@ entry:
 define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzs w8, d0
-; CHECK-NEXT:    fcvtzs w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 0b19a9ff7e3dd9..9f7783f9b22a66 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -84,15 +84,7 @@ entry:
 define <2 x i32> @vqmovni64_smaxmin(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_smaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    dup v1.2d, x8
-; CHECK-NEXT:    mov x8, #-2147483648 // =0xffffffff80000000
-; CHECK-NEXT:    cmgt v2.2d, v1.2d, v0.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    dup v1.2d, x8
-; CHECK-NEXT:    cmgt v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp slt <2 x i64> %s0, <i64 2147483647, i64 2147483647>
@@ -106,15 +98,7 @@ entry:
 define <2 x i32> @vqmovni64_sminmax(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_sminmax:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov x8, #-2147483648 // =0xffffffff80000000
-; CHECK-NEXT:    dup v1.2d, x8
-; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    cmgt v2.2d, v0.2d, v1.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    dup v1.2d, x8
-; CHECK-NEXT:    cmgt v2.2d, v1.2d, v0.2d
-; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    sqxtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp sgt <2 x i64> %s0, <i64 -2147483648, i64 -2147483648>
@@ -128,11 +112,7 @@ entry:
 define <2 x i32> @vqmovni64_umaxmin(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_umaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT:    cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    uqxtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
@@ -174,16 +154,9 @@ entry:
 define <4 x i32> @signed_minmax_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_minmax_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
+; CHECK-NEXT:    sqxtn v1.4h, v1.4s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    dup v2.2d, x8
-; CHECK-NEXT:    mov x8, #-2147483648 // =0xffffffff80000000
-; CHECK-NEXT:    cmgt v3.2d, v2.2d, v1.2d
-; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
-; CHECK-NEXT:    dup v2.2d, x8
-; CHECK-NEXT:    cmgt v3.2d, v1.2d, v2.2d
-; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
   %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %y, <2 x i64> <i64 2147483647, i64 2147483647>)
@@ -226,16 +199,9 @@ entry:
 define <4 x i32> @signed_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov x8, #-2147483648 // =0xffffffff80000000
+; CHECK-NEXT:    sqxtn v1.4h, v1.4s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    dup v2.2d, x8
-; CHECK-NEXT:    mov w8, #2147483647 // =0x7fffffff
-; CHECK-NEXT:    cmgt v3.2d, v1.2d, v2.2d
-; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
-; CHECK-NEXT:    dup v2.2d, x8
-; CHECK-NEXT:    cmgt v3.2d, v2.2d, v1.2d
-; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
   %max = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> <i64 -2147483648, i64 -2147483648>)
@@ -276,12 +242,9 @@ entry:
 define <4 x i32> @unsigned_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: unsigned_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT:    uqxtn v1.2s, v1.2d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    cmhi v2.2d, v2.2d, v1.2d
-; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    orn v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
   %min = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %y, <2 x i64> <i64 4294967295, i64 4294967295>)
@@ -332,13 +295,10 @@ define <4 x i32> @us_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: us_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmgt v2.2d, v1.2d, #0
-; CHECK-NEXT:    movi v3.2d, #0x000000ffffffff
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    cmgt v2.2d, v3.2d, v1.2d
-; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    orn v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    xtn2 v0.4s, v1.2d
+; CHECK-NEXT:    uqxtn v1.2s, v1.2d
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
   %max = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> zeroinitializer)

>From ab662250b64e96914a2f045db84d160a23040e48 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 03:03:20 +0900
Subject: [PATCH 09/31] aarch64: remove unused comment

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 630411da9bbce7..439f8dfc3c15dd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5427,14 +5427,12 @@ def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
 def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))),
           (UQXTNv2i32 V128:$Vn)>;
 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
-//  with reversed min/max
 def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
           (SQXTNv8i8 V128:$Vn)>;
 // trunc(umin(smax(X, 0), 255)) -> SQXTUN
 def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
           (SQXTUNv8i8 V128:$Vn)>;
 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
-//  with reversed min/max
 def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
           (SQXTNv4i16 V128:$Vn)>;
 // trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
@@ -5456,7 +5454,6 @@ def : Pat<(v8i16 (concat_vectors
           (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
 // concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
-// with reversed min/max
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncssat_s (v8i16 V128:$Vn))))),
@@ -5468,7 +5465,6 @@ def : Pat<(v16i8 (concat_vectors
           (SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
 // concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
-// with reversed min/max
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
                  (v4i16 (truncssat_s (v4i32 V128:$Vn))))),

>From 045bce777f0aa36f46090127c1add59a5d488a6e Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 03:34:20 +0900
Subject: [PATCH 10/31] Functionalized MatchMinMax

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 60 ++++++++-----------
 1 file changed, 26 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 806ec8ac547ab6..08a05ab283fb01 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14952,6 +14952,21 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
   return SDValue();
 }
 
+// Match min/max and return limit value as a parameter.
+static SDValue matchMinMax(SDValue V, unsigned Opcode, APInt &Limit,
+                           bool Signed) {
+  if (V.getOpcode() == Opcode) {
+    if (Signed) {
+      APInt C;
+      if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) &&
+          C == Limit)
+        return V.getOperand(0);
+    } else if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
+      return V.getOperand(0);
+  }
+  return SDValue();
+}
+
 /// Detect patterns of truncation with unsigned saturation:
 ///
 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
@@ -14965,16 +14980,8 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
   assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
          "Unexpected types for truncate operation");
 
-  // Match min/max and return limit value as a parameter.
-  auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
-    if (V.getOpcode() == Opcode &&
-        ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
-      return V.getOperand(0);
-    return SDValue();
-  };
-
   APInt C1, C2;
-  if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2))
+  if (SDValue UMin = matchMinMax(In, ISD::UMIN, C2, /*Signed*/ false))
     // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
     // the element size of the destination type.
     if (C2.isMask(VT.getScalarSizeInBits()))
@@ -14997,25 +15004,18 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
   unsigned NumSrcBits = In.getScalarValueSizeInBits();
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  auto MatchMinMax = [](SDValue V, unsigned Opcode,
-                        const APInt &Limit) -> SDValue {
-    APInt C;
-    if (V.getOpcode() == Opcode &&
-        ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit)
-      return V.getOperand(0);
-    return SDValue();
-  };
-
   APInt SignedMax, SignedMin;
   SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
   SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
-  if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) {
-    if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) {
+  if (SDValue SMin = matchMinMax(In, ISD::SMIN, SignedMax, /*Signed*/ true)) {
+    if (SDValue SMax =
+            matchMinMax(SMin, ISD::SMAX, SignedMin, /*Signed*/ true)) {
       return SMax;
     }
   }
-  if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) {
-    if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) {
+  if (SDValue SMax = matchMinMax(In, ISD::SMAX, SignedMin, /*Signed*/ true)) {
+    if (SDValue SMin =
+            matchMinMax(SMax, ISD::SMIN, SignedMax, /*Signed*/ true)) {
       return SMin;
     }
   }
@@ -15038,22 +15038,14 @@ static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
   assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
          "Unexpected types for truncate operation");
 
-  // Match min/max and return limit value as a parameter.
-  auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {
-    if (V.getOpcode() == Opcode &&
-        ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
-      return V.getOperand(0);
-    return SDValue();
-  };
-
   APInt C1, C2;
-  if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2))
-    if (MatchMinMax(SMin, ISD::SMAX, C1))
+  if (SDValue SMin = matchMinMax(In, ISD::SMIN, C2, /*Signed*/ false))
+    if (matchMinMax(SMin, ISD::SMAX, C1, /*Signed*/ false))
       if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))
         return SMin;
 
-  if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1))
-    if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2))
+  if (SDValue SMax = matchMinMax(In, ISD::SMAX, C1, /*Signed*/ false))
+    if (SDValue SMin = matchMinMax(SMax, ISD::SMIN, C2, /*Signed*/ false))
       if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&
           C2.uge(C1))
         return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));

>From dbd711330e104d4496cdf5c61fff58a10715fd51 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sun, 28 Jul 2024 23:26:46 +0900
Subject: [PATCH 11/31] aarch64: fix wrong pattern

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 2 +-
 llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 4 ++--
 llvm/test/CodeGen/AArch64/qmovn.ll            | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 439f8dfc3c15dd..7d94697460c87a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5437,7 +5437,7 @@ def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
           (SQXTNv4i16 V128:$Vn)>;
 // trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
 def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))),
-          (SQXTNv4i16 V128:$Vn)>;
+          (SQXTNv2i32 V128:$Vn)>;
 // trunc(umin(smax(X, 0), 65535)) -> SQXTUN
 def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
           (SQXTUNv4i16 V128:$Vn)>;
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index e73df1b87c5dc7..ca629cee24d36d 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -8,7 +8,7 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
@@ -653,7 +653,7 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i32_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 9f7783f9b22a66..dcbd4d235102fa 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -84,7 +84,7 @@ entry:
 define <2 x i32> @vqmovni64_smaxmin(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_smaxmin:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp slt <2 x i64> %s0, <i64 2147483647, i64 2147483647>
@@ -98,7 +98,7 @@ entry:
 define <2 x i32> @vqmovni64_sminmax(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_sminmax:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sqxtn v0.4h, v0.4s
+; CHECK-NEXT:    sqxtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %c1 = icmp sgt <2 x i64> %s0, <i64 -2147483648, i64 -2147483648>
@@ -154,7 +154,7 @@ entry:
 define <4 x i32> @signed_minmax_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_minmax_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sqxtn v1.4h, v1.4s
+; CHECK-NEXT:    sqxtn v1.2s, v1.2d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
@@ -199,7 +199,7 @@ entry:
 define <4 x i32> @signed_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sqxtn v1.4h, v1.4s
+; CHECK-NEXT:    sqxtn v1.2s, v1.2d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret

>From 09b61f217a49dc55a3ac31b6c2f4496daa687ead Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 30 Jul 2024 22:22:45 +0900
Subject: [PATCH 12/31] DAG: Adding cases and changing the structure of
 truncate_usat_u

- add pattern for `truncate_usat_u`.
- split the if-else to detect SSAT_S and SSAT_U so that
they are checked independently.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 ++++++++++---------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 08a05ab283fb01..8e2df7c070cca6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14981,11 +14981,19 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
          "Unexpected types for truncate operation");
 
   APInt C1, C2;
-  if (SDValue UMin = matchMinMax(In, ISD::UMIN, C2, /*Signed*/ false))
+  if (SDValue UMin = matchMinMax(In, ISD::UMIN, C2, /*Signed*/ false)) {
     // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
     // the element size of the destination type.
-    if (C2.isMask(VT.getScalarSizeInBits()))
-      return UMin;
+    if (C2.isMask(VT.getScalarSizeInBits())) {
+      // (truncate (umin (smax (x, C1), C2)))
+      // where C1 == 0, C2 is unsigned max of destination type.
+      if (SDValue SMax = matchMinMax(UMin, ISD::SMAX, C1, /*Signed*/ false)) {
+        if (C1.isZero())
+          return SMax;
+      } else
+        return UMin;
+    }
+  }
 
   return SDValue();
 }
@@ -15023,27 +15031,24 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
 }
 
 /// Detect patterns of truncation with unsigned saturation:
-///
-/// (truncate (smin (smax (x, C1), C2)) to dest_type),
-/// where C1 >= 0 and C2 is unsigned max of destination type.
-///
-/// (truncate (smax (smin (x, C2), C1)) to dest_type)
-/// where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
-///
 static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
                                   const SDLoc &DL) {
   EVT InVT = In.getValueType();
-
+  
   // Saturation with truncation. We truncate from InVT to VT.
   assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
          "Unexpected types for truncate operation");
 
   APInt C1, C2;
+  // (truncate (smin (smax (x, C1), C2)) to dest_type),
+  // where C1 >= 0 and C2 is unsigned max of destination type.
   if (SDValue SMin = matchMinMax(In, ISD::SMIN, C2, /*Signed*/ false))
     if (matchMinMax(SMin, ISD::SMAX, C1, /*Signed*/ false))
       if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))
         return SMin;
 
+  // (truncate (smax (smin (x, C2), C1)) to dest_type)
+  // where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
   if (SDValue SMax = matchMinMax(In, ISD::SMAX, C1, /*Signed*/ false))
     if (SDValue SMin = matchMinMax(SMax, ISD::SMIN, C2, /*Signed*/ false))
       if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&
@@ -15058,21 +15063,18 @@ static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
                                SelectionDAG &DAG) {
   if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
     if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_S, SrcVT) &&
-        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_S, VT)) {
+        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_S, VT))
       if (SDValue SSatVal = detectSSatSPattern(Src, VT))
         return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
-    } else if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_U, SrcVT) &&
-               TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_U, VT)) {
+    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_U, SrcVT) &&
+        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_U, VT))
       if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
-        return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
-    }
+        return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
   } else if (Src.getOpcode() == ISD::UMIN) {
     if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_USAT_U, SrcVT) &&
-        TLI.isTypeDesirableForOp(ISD::TRUNCATE_USAT_U, VT)) {
-      if (SDValue USatVal = detectUSatUPattern(Src, VT)) {
+        TLI.isTypeDesirableForOp(ISD::TRUNCATE_USAT_U, VT))
+      if (SDValue USatVal = detectUSatUPattern(Src, VT))
         return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
-      }
-    }
   }
 
   return SDValue();

>From 4dee3ccd8471279e5cfc0c4386f77fa8451041bf Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 30 Jul 2024 22:23:28 +0900
Subject: [PATCH 13/31] aarch64: add more patterns

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 45 +++++++++++++++------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7d94697460c87a..3fe27c4be56776 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5429,51 +5429,72 @@ def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))),
 // trunc(smin(smax(X, -128), 128)) -> SQXTRN
 def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
           (SQXTNv8i8 V128:$Vn)>;
-// trunc(umin(smax(X, 0), 255)) -> SQXTUN
-def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
-          (SQXTUNv8i8 V128:$Vn)>;
 // trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
 def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
           (SQXTNv4i16 V128:$Vn)>;
 // trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
 def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))),
           (SQXTNv2i32 V128:$Vn)>;
+// trunc(umin(smax(X, 0), 255)) -> SQXTUN
+def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
+          (SQXTUNv8i8 V128:$Vn)>;
 // trunc(umin(smax(X, 0), 65535)) -> SQXTUN
 def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
           (SQXTUNv4i16 V128:$Vn)>;
+// trunc(umin(smax(X, 0), 4294967295)) -> SQXTUN
+def : Pat<(v2i32 (truncssat_u (v2i64 V128:$Vn))),
+          (SQXTUNv2i32 V128:$Vn)>;
 
-// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
+// truncusat_u
+// concat_vectors(Vd, trunc(umin(X, 255))) ~> UQXTRN(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncusat_u (v8i16 V128:$Vn))))),
           (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
+// concat_vectors(Vd, trunc(umin(X, 65535))) ~> UQXTRN(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
                  (v4i16 (truncusat_u (v4i32 V128:$Vn))))),
           (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(umin(X, 4294967295))) ~> UQXTRN(Vd, Vn)
+def : Pat<(v4i32 (concat_vectors
+                 (v2i32 V64:$Vd),
+                 (v2i32 (truncusat_u (v2i64 V128:$Vn))))),
+          (UQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
-// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
+// truncssat_s
+// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127)) ~> SQXTN2(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncssat_s (v8i16 V128:$Vn))))),
           (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767)) ~> SQXTN2(Vd, Vn)
+def : Pat<(v8i16 (concat_vectors
+                 (v4i16 V64:$Vd),
+                 (v4i16 (truncssat_s (v4i32 V128:$Vn))))),
+          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(smin(smax Vm, -2147483648), 2147483647) ~> SQXTN2(Vd, Vn)
+def : Pat<(v4i32 (concat_vectors
+                 (v2i32 V64:$Vd),
+                 (v2i32 (truncssat_s (v2i64 V128:$Vn))))),
+          (SQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+
+// truncssat_u
 // concat_vectors(Vd, trunc(smin(smax Vm, 0), 127) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncssat_u (v8i16 V128:$Vn))))),
           (SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-
-// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
-def : Pat<(v8i16 (concat_vectors
-                 (v4i16 V64:$Vd),
-                 (v4i16 (truncssat_s (v4i32 V128:$Vn))))),
-          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 // concat_vectors(Vd, trunc(smin(smax Vm, 0), 32767) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
                  (v4i16 (truncssat_u (v4i32 V128:$Vn))))),
           (SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
+// concat_vectors(Vd, trunc(smin(smax Vm, 0), 2147483647) ~> SQXTUN2(Vd, Vn)
+def : Pat<(v4i32 (concat_vectors
+                 (v2i32 V64:$Vd),
+                 (v2i32 (truncssat_u (v2i64 V128:$Vn))))),
+          (SQXTUNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
 // Select BSWAP vector instructions into REV instructions
 def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), 

>From d28793863182f8a79e3b383fdd632967a1e66391 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 30 Jul 2024 22:47:37 +0900
Subject: [PATCH 14/31] aarch64: update test affected by previous commit

Tests changed by the affected of the updated pattern in
the previous commit.
---
 llvm/test/CodeGen/AArch64/qmovn.ll | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index dcbd4d235102fa..2247c285b2afb0 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -154,9 +154,8 @@ entry:
 define <4 x i32> @signed_minmax_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_minmax_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sqxtn v1.2s, v1.2d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    sqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
   %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %y, <2 x i64> <i64 2147483647, i64 2147483647>)
@@ -199,9 +198,8 @@ entry:
 define <4 x i32> @signed_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sqxtn v1.2s, v1.2d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    sqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
   %max = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> <i64 -2147483648, i64 -2147483648>)
@@ -242,9 +240,8 @@ entry:
 define <4 x i32> @unsigned_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: unsigned_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    uqxtn v1.2s, v1.2d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    uqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
   %min = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %y, <2 x i64> <i64 4294967295, i64 4294967295>)
@@ -255,14 +252,10 @@ entry:
 
 ; Test the (concat_vectors (X), (trunc(umin(smax(Y, 0), 2^n))))) pattern.
 
-; TODO: %min is a value between 0 and 255 and is within the unsigned range of i8.
-; So it is saturated truncate. we have an optimization opportunity.
 define <16 x i8> @us_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: us_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smax v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    uqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -273,14 +266,10 @@ entry:
   ret <16 x i8> %shuffle
 }
 
-; TODO: %min is a value between 0 and 65535 and is within the unsigned range of i16.
-; So it is saturated. we have an optimization opportunity.
 define <8 x i16> @us_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: us_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    uqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -294,11 +283,8 @@ entry:
 define <4 x i32> @us_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: us_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v2.2d, v1.2d, #0
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    uqxtn v1.2s, v1.2d
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    uqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
   %max = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> zeroinitializer)

>From e0e8176f0df3650a2577eb5b6374d2943db8ed53 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 30 Jul 2024 22:50:37 +0900
Subject: [PATCH 15/31] aarch64: add tests for TRUNCATE_SSAT_U

---
 llvm/test/CodeGen/AArch64/qmovn.ll | 146 +++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 2247c285b2afb0..98ed9b2c407c2a 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -109,6 +109,102 @@ entry:
   ret <2 x i32> %t
 }
 
+define <2 x i32> @vqmovni64_smaxmin_u(<2 x i64> %s0) {
+; CHECK-LABEL: vqmovni64_smaxmin_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmgt v1.2d, v0.2d, #0
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    sqxtun v0.2s, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c1 = icmp slt <2 x i64> %s0, <i64 4294967295, i64 4294967295>
+  %s1 = select <2 x i1> %c1, <2 x i64> %s0, <2 x i64> <i64 4294967295, i64 4294967295>
+  %c2 = icmp sgt <2 x i64> %s1, zeroinitializer
+  %s2 = select <2 x i1> %c2, <2 x i64> %s1, <2 x i64> zeroinitializer
+  %t = trunc <2 x i64> %s2 to <2 x i32>
+  ret <2 x i32> %t
+}
+
+define <2 x i32> @vqmovni64_sminmax_u(<2 x i64> %s0) {
+; CHECK-LABEL: vqmovni64_sminmax_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmgt v1.2d, v0.2d, #0
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    sqxtun v0.2s, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %c1 = icmp sgt <2 x i64> %s0, zeroinitializer
+  %s1 = select <2 x i1> %c1, <2 x i64> %s0, <2 x i64> zeroinitializer
+  %c2 = icmp slt <2 x i64> %s1, <i64 4294967295, i64 4294967295>
+  %s2 = select <2 x i1> %c2, <2 x i64> %s1, <2 x i64> <i64 4294967295, i64 4294967295>
+  %t = trunc <2 x i64> %s2 to <2 x i32>
+  ret <2 x i32> %t
+}
+
+define <4 x i16> @vqmovni32_smaxmin_u(<4 x i32> %s0) {
+; CHECK-LABEL: vqmovni32_smaxmin_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sqxtun v0.4h, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c1 = icmp slt <4 x i32> %s0, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %s1 = select <4 x i1> %c1, <4 x i32> %s0, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+  %c2 = icmp sgt <4 x i32> %s1, zeroinitializer
+  %s2 = select <4 x i1> %c2, <4 x i32> %s1, <4 x i32> zeroinitializer
+  %t = trunc <4 x i32> %s2 to <4 x i16>
+  ret <4 x i16> %t
+}
+
+define <4 x i16> @vqmovni32_sminmax_u(<4 x i32> %s0) {
+; CHECK-LABEL: vqmovni32_sminmax_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sqxtun v0.4h, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %c1 = icmp sgt <4 x i32> %s0, zeroinitializer
+  %s1 = select <4 x i1> %c1, <4 x i32> %s0, <4 x i32> zeroinitializer
+  %c2 = icmp slt <4 x i32> %s1, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %s2 = select <4 x i1> %c2, <4 x i32> %s1, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+  %t = trunc <4 x i32> %s2 to <4 x i16>
+  ret <4 x i16> %t
+}
+
+define <8 x i8> @vqmovni16_smaxmin_u(<8 x i16> %s0) {
+; CHECK-LABEL: vqmovni16_smaxmin_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    sqxtun v0.8b, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %c1 = icmp slt <8 x i16> %s0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %s1 = select <8 x i1> %c1, <8 x i16> %s0, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %c2 = icmp sgt <8 x i16> %s1, zeroinitializer
+  %s2 = select <8 x i1> %c2, <8 x i16> %s1, <8 x i16> zeroinitializer
+  %t = trunc <8 x i16> %s2 to <8 x i8>
+  ret <8 x i8> %t
+}
+
+define <8 x i8> @vqmovni16_sminmax_u(<8 x i16> %s0) {
+; CHECK-LABEL: vqmovni16_sminmax_u:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    sqxtun v0.8b, v0.8h
+; CHECK-NEXT:    ret
+entry:
+  %c1 = icmp sgt <8 x i16> %s0, zeroinitializer
+  %s1 = select <8 x i1> %c1, <8 x i16> %s0, <8 x i16> zeroinitializer
+  %c2 = icmp slt <8 x i16> %s1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %s2 = select <8 x i1> %c2, <8 x i16> %s1, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %t = trunc <8 x i16> %s2 to <8 x i8>
+  ret <8 x i8> %t
+}
+
 define <2 x i32> @vqmovni64_umaxmin(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_umaxmin:
 ; CHECK:       // %bb.0: // %entry
@@ -293,3 +389,53 @@ entry:
   %shuffle = shufflevector <2 x i32> %x, <2 x i32> %trunc, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %shuffle
 }
+
+; Test the (concat_vectors (X), (trunc(smin(smax(Y, 0), 2^n))))) pattern.
+
+define <16 x i8> @sminsmax_range_unsigned_i16_to_i8(<8 x i8> %x, <8 x i16> %y) {
+; CHECK-LABEL: sminsmax_range_unsigned_i16_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    smax v1.8h, v1.8h, v2.8h
+; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
+; CHECK-NEXT:    ret
+entry:
+  %min = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %y, <8 x i16> zeroinitializer)
+  %max = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %min, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>)
+  %trunc = trunc <8 x i16> %max to <8 x i8>
+  %shuffle = shufflevector <8 x i8> %x, <8 x i8> %trunc, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @sminsmax_range_unsigned_i32_to_i16(<4 x i16> %x, <4 x i32> %y) {
+; CHECK-LABEL: sminsmax_range_unsigned_i32_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %smax = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> zeroinitializer)
+  %smin = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %smax, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+  %trunc = trunc <4 x i32> %smin to <4 x i16>
+  %shuffle = shufflevector <4 x i16> %x, <4 x i16> %trunc, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <4 x i32> @sminsmax_range_unsigned_i64_to_i32(<2 x i32> %x, <2 x i64> %y) {
+; CHECK-LABEL: sminsmax_range_unsigned_i64_to_i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmgt v2.2d, v1.2d, #0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %smax = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> zeroinitializer)
+  %smin = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %smax, <2 x i64> <i64 4294967295, i64 4294967295>)
+  %trunc = trunc <2 x i64> %smin to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %x, <2 x i32> %trunc, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle
+}

>From 9f8691bfb3da20f1082ee1f3e28e3d338b873a32 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 1 Aug 2024 01:24:10 +0900
Subject: [PATCH 16/31] DAG: fix misused pattern `trunc (umin(smax(x, C1),
 C2))`

- In a previous commit, `trunc (umin(smax(x, C1), C2))` was
combined into `truncusat_u`, which was a mistake, so changed
it to combined into `truncssat_u`.

- Changed `detectSSatUPattern` function to handle
`umin(smax(x, C1), C2)`
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 69 +++++++++++--------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8e2df7c070cca6..1adf7e77db6024 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14980,19 +14980,12 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
   assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
          "Unexpected types for truncate operation");
 
-  APInt C1, C2;
-  if (SDValue UMin = matchMinMax(In, ISD::UMIN, C2, /*Signed*/ false)) {
+  APInt Max;
+  if (SDValue UMin = matchMinMax(In, ISD::UMIN, Max, /*Signed*/ false)) {
     // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
     // the element size of the destination type.
-    if (C2.isMask(VT.getScalarSizeInBits())) {
-      // (truncate (umin (smax (x, C1), C2)))
-      // where C1 == 0, C2 is unsigned max of destination type.
-      if (SDValue SMax = matchMinMax(UMin, ISD::SMAX, C1, /*Signed*/ false)) {
-        if (C1.isZero())
-          return SMax;
-      } else
-        return UMin;
-    }
+    if (Max.isMask(VT.getScalarSizeInBits()))
+      return UMin;
   }
 
   return SDValue();
@@ -15034,26 +15027,44 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
 static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
                                   const SDLoc &DL) {
   EVT InVT = In.getValueType();
-  
+
   // Saturation with truncation. We truncate from InVT to VT.
   assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
          "Unexpected types for truncate operation");
 
-  APInt C1, C2;
-  // (truncate (smin (smax (x, C1), C2)) to dest_type),
-  // where C1 >= 0 and C2 is unsigned max of destination type.
-  if (SDValue SMin = matchMinMax(In, ISD::SMIN, C2, /*Signed*/ false))
-    if (matchMinMax(SMin, ISD::SMAX, C1, /*Signed*/ false))
-      if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))
-        return SMin;
-
-  // (truncate (smax (smin (x, C2), C1)) to dest_type)
-  // where C1 >= 0, C2 is unsigned max of destination type and C1 <= C2.
-  if (SDValue SMax = matchMinMax(In, ISD::SMAX, C1, /*Signed*/ false))
-    if (SDValue SMin = matchMinMax(SMax, ISD::SMIN, C2, /*Signed*/ false))
-      if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&
-          C2.uge(C1))
-        return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
+  APInt Min, Max;
+  SDValue SMax, SMin, UMin, First;
+  // (truncate (smin (smax (x, Min), Max)) to dest_type),
+  // (truncate (smax (smin (x, Max), Min)) to dest_type)
+  // (truncate (umin (smax (x, Min), Max)) to dest_type)
+  // where Min >= 0, Max is unsigned max of destination type and Min <= Max.
+  if (First = SMax = matchMinMax(In, ISD::SMAX, Min, /*Signed*/ false))
+    SMin = matchMinMax(First, ISD::SMIN, Max, /*Signed*/ false);
+  else if (First = SMin = matchMinMax(In, ISD::SMIN, Max, /*Signed*/ false))
+    SMax = matchMinMax(First, ISD::SMAX, Min, /*Signed*/ false);
+  else if (First = UMin = matchMinMax(In, ISD::UMIN, Max, /*Signed*/ false))
+    SMax = matchMinMax(UMin, ISD::SMAX, Min, /*Signed*/ false);
+
+  if (SMax && Min.isNonNegative() && Max.isMask(VT.getScalarSizeInBits())) {
+    if (SMin) {
+      if (Min.isZero()) {
+        if (isa<ConstantSDNode>(First.getOperand(0)))
+          return First.getOperand(1);
+        return First.getOperand(0);
+      } else {
+        if (First == SMin)
+          return SMin;
+        else if (Max.uge(Min))
+          return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
+      }
+    } else if (UMin) {
+      if (Min.isZero()) {
+        if (isa<ConstantSDNode>(First.getOperand(0)))
+          return First.getOperand(1);
+        return First.getOperand(0);
+      }
+    }
+  }
 
   return SDValue();
 }
@@ -15071,6 +15082,10 @@ static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
       if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
         return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
   } else if (Src.getOpcode() == ISD::UMIN) {
+    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_U, SrcVT) &&
+        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_U, VT))
+      if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
+        return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
     if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_USAT_U, SrcVT) &&
         TLI.isTypeDesirableForOp(ISD::TRUNCATE_USAT_U, VT))
       if (SDValue USatVal = detectUSatUPattern(Src, VT))

>From 7a6f47961041050fb20a86a2a250d5bb17e8e6bc Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 1 Aug 2024 01:24:25 +0900
Subject: [PATCH 17/31] aarch64: update tests

changed since commit 9f8691bfb3da20f1082ee1f3e28e3d338b873a32
---
 llvm/test/CodeGen/AArch64/qmovn.ll | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 98ed9b2c407c2a..65d148d44844b1 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -112,8 +112,6 @@ entry:
 define <2 x i32> @vqmovni64_smaxmin_u(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_smaxmin_u:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v1.2d, v0.2d, #0
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sqxtun v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -128,8 +126,6 @@ entry:
 define <2 x i32> @vqmovni64_sminmax_u(<2 x i64> %s0) {
 ; CHECK-LABEL: vqmovni64_sminmax_u:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v1.2d, v0.2d, #0
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    sqxtun v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -144,8 +140,6 @@ entry:
 define <4 x i16> @vqmovni32_smaxmin_u(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_smaxmin_u:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    sqxtun v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -160,8 +154,6 @@ entry:
 define <4 x i16> @vqmovni32_sminmax_u(<4 x i32> %s0) {
 ; CHECK-LABEL: vqmovni32_sminmax_u:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    sqxtun v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -176,8 +168,6 @@ entry:
 define <8 x i8> @vqmovni16_smaxmin_u(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_smaxmin_u:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    sqxtun v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -192,8 +182,6 @@ entry:
 define <8 x i8> @vqmovni16_sminmax_u(<8 x i16> %s0) {
 ; CHECK-LABEL: vqmovni16_sminmax_u:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    smax v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    sqxtun v0.8b, v0.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -352,7 +340,7 @@ define <16 x i8> @us_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: us_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    uqxtn2 v0.16b, v1.8h
+; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
   %max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %y, <8 x i16> zeroinitializer)
@@ -366,7 +354,7 @@ define <8 x i16> @us_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: us_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    uqxtn2 v0.8h, v1.4s
+; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
   %max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> zeroinitializer)
@@ -380,7 +368,7 @@ define <4 x i32> @us_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: us_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    uqxtn2 v0.4s, v1.2d
+; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
   %max = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> zeroinitializer)
@@ -395,9 +383,7 @@ entry:
 define <16 x i8> @sminsmax_range_unsigned_i16_to_i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i16_to_i8:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smax v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -411,9 +397,7 @@ entry:
 define <8 x i16> @sminsmax_range_unsigned_i32_to_i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i32_to_i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v2.2d, #0000000000000000
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
 ; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -427,9 +411,7 @@ entry:
 define <4 x i32> @sminsmax_range_unsigned_i64_to_i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i64_to_i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v2.2d, v1.2d, #0
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
 ; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:

>From e54a1f639abeb7effde0d93c606b9bdd08180104 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 1 Aug 2024 16:01:34 +0900
Subject: [PATCH 18/31] riscv: update tests

changed since commit 9f8691bfb3da20f1082ee1f3e28e3d338b873a32
---
 .../RISCV/rvv/fixed-vectors-trunc-sat-clip.ll | 32 +++------
 .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll     | 68 +++++++------------
 .../RISCV/rvv/trunc-sat-clip-sdnode.ll        | 32 +++------
 .../RISCV/rvv/trunc-select-to-max-usat.ll     | 57 +++++-----------
 4 files changed, 60 insertions(+), 129 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
index 4e367bb0d70cd1..e2f540e991fd0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-sat-clip.ll
@@ -101,10 +101,8 @@ define void @trunc_sat_u8u16_notopt(ptr %x, ptr %y) {
 define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_maxmin:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
@@ -119,10 +117,8 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
 define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_minmax:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
@@ -356,10 +352,8 @@ define void @trunc_sat_u32u64_min(ptr %x, ptr %y) {
 define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_maxmin:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vse32.v v10, (a1)
 ; CHECK-NEXT:    ret
@@ -374,10 +368,8 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_minmax:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vse32.v v10, (a1)
 ; CHECK-NEXT:    ret
@@ -445,10 +437,8 @@ define void @trunc_sat_u8u32_min(ptr %x, ptr %y) {
 define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u32_maxmin:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
@@ -465,10 +455,8 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
 define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u32_minmax:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
@@ -544,10 +532,8 @@ define void @trunc_sat_u8u64_min(ptr %x, ptr %y) {
 define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u64_maxmin:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
@@ -566,10 +552,8 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
 define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u64_minmax:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 3e2db3fa4685dd..ffbcebf621fd7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -113,7 +113,6 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
@@ -304,9 +303,6 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT:    vmax.vx v10, v10, zero
-; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
 ; CHECK-V-NEXT:    ret
 entry:
@@ -801,17 +797,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    vmv.s.x v10, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
@@ -944,9 +939,8 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT:    vmax.vx v8, v9, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v9, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i32>
@@ -1139,7 +1133,6 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
@@ -2114,24 +2107,23 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    vmv.s.x v10, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
@@ -3473,7 +3465,6 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
@@ -3659,9 +3650,6 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfwcvt.rtz.x.f.v v10, v8
-; CHECK-V-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-V-NEXT:    vmax.vx v10, v10, zero
-; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
 ; CHECK-V-NEXT:    ret
 entry:
@@ -4151,17 +4139,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    vmv.s.x v10, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 2
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
 ; CHECK-V-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
@@ -4289,9 +4276,8 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 ; CHECK-V-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-V-NEXT:    vmax.vx v8, v9, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
+; CHECK-V-NEXT:    vnclipu.wi v8, v9, 0
 ; CHECK-V-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i32>
@@ -4479,7 +4465,6 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-V-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-V-NEXT:    vmax.vx v8, v8, zero
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-V-NEXT:    ret
@@ -5449,24 +5434,23 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-V-NEXT:    call __extendhfsf2
 ; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    vmv.s.x v10, a0
 ; CHECK-V-NEXT:    addi a0, sp, 16
-; CHECK-V-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 1
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 2
 ; CHECK-V-NEXT:    csrr a0, vlenb
 ; CHECK-V-NEXT:    slli a0, a0, 1
 ; CHECK-V-NEXT:    add a0, sp, a0
 ; CHECK-V-NEXT:    addi a0, a0, 16
-; CHECK-V-NEXT:    vl2r.v v10, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT:    vl2r.v v8, (a0) # Unknown-size Folded Reload
 ; CHECK-V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-V-NEXT:    vslideup.vi v8, v10, 4
-; CHECK-V-NEXT:    vmax.vx v10, v8, zero
+; CHECK-V-NEXT:    vslideup.vi v10, v8, 4
 ; CHECK-V-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-V-NEXT:    vnclipu.wi v8, v10, 0
 ; CHECK-V-NEXT:    csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
index 01a90d8a33b6ec..f43faadc532f26 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-sat-clip-sdnode.ll
@@ -102,9 +102,7 @@ define void @trunc_sat_u8u16_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
@@ -120,9 +118,7 @@ define void @trunc_sat_u8u16_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u16_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl1re16.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
@@ -357,9 +353,7 @@ define void @trunc_sat_u32u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
@@ -375,9 +369,7 @@ define void @trunc_sat_u32u64_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u32u64_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vs2r.v v12, (a1)
 ; CHECK-NEXT:    ret
@@ -446,9 +438,7 @@ define void @trunc_sat_u8u32_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u32_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
@@ -466,9 +456,7 @@ define void @trunc_sat_u8u32_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u32_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl2re32.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
@@ -545,9 +533,7 @@ define void @trunc_sat_u8u64_maxmin(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u64_maxmin:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v12, 0
@@ -567,9 +553,7 @@ define void @trunc_sat_u8u64_minmax(ptr %x, ptr %y) {
 ; CHECK-LABEL: trunc_sat_u8u64_minmax:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vl4re64.v v8, (a0)
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v12, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
index 28d7588b9347a7..992ea8f8c18a5e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/trunc-select-to-max-usat.ll
@@ -4,9 +4,7 @@
 define <4 x i8> @test_v4i16_v4i8(<4 x i16> %x) {
 ; CHECK-LABEL: test_v4i16_v4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <4 x i16> %x, zeroinitializer
@@ -20,9 +18,7 @@ define <4 x i8> @test_v4i16_v4i8(<4 x i16> %x) {
 define <4 x i8> @test_v4i32_v4i8(<4 x i32> %x) {
 ; CHECK-LABEL: test_v4i32_v4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
@@ -38,9 +34,7 @@ define <4 x i8> @test_v4i32_v4i8(<4 x i32> %x) {
 define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) {
 ; CHECK-LABEL: test_v4i64_v4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
@@ -58,9 +52,7 @@ define <4 x i8> @test_v4i64_v4i8(<4 x i64> %x) {
 define <4 x i16> @test_v4i32_v4i16(<4 x i32> %x) {
 ; CHECK-LABEL: test_v4i32_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <4 x i32> %x, zeroinitializer
@@ -74,9 +66,7 @@ define <4 x i16> @test_v4i32_v4i16(<4 x i32> %x) {
 define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) {
 ; CHECK-LABEL: test_v4i64_v4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
@@ -92,10 +82,9 @@ define <4 x i16> @test_v4i64_v4i16(<4 x i64> %x) {
 define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) {
 ; CHECK-LABEL: test_v4i64_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v10, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %a = icmp sgt <4 x i64> %x, zeroinitializer
   %b = sext <4 x i1> %a to <4 x i64>
@@ -108,9 +97,7 @@ define <4 x i32> @test_v4i64_v4i32(<4 x i64> %x) {
 define <vscale x 4 x i8> @test_nxv4i16_nxv4i8(<vscale x 4 x i16> %x) {
 ; CHECK-LABEL: test_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v8, 0
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i16> %x, zeroinitializer
@@ -124,9 +111,7 @@ define <vscale x 4 x i8> @test_nxv4i16_nxv4i8(<vscale x 4 x i16> %x) {
 define <vscale x 4 x i8> @test_nxv4i32_nxv4i8(<vscale x 4 x i32> %x) {
 ; CHECK-LABEL: test_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v10, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v10, 0
@@ -142,9 +127,7 @@ define <vscale x 4 x i8> @test_nxv4i32_nxv4i8(<vscale x 4 x i32> %x) {
 define <vscale x 4 x i8> @test_nxv4i64_nxv4i8(<vscale x 4 x i64> %x) {
 ; CHECK-LABEL: test_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v12, 0
@@ -162,10 +145,9 @@ define <vscale x 4 x i8> @test_nxv4i64_nxv4i8(<vscale x 4 x i64> %x) {
 define <vscale x 4 x i16> @test_nxv4i32_nxv4i16(<vscale x 4 x i32> %x) {
 ; CHECK-LABEL: test_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vmax.vx v10, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v10, 0
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnclipu.wi v10, v8, 0
+; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i32> %x, zeroinitializer
   %b = sext <vscale x 4 x i1> %a to <vscale x 4 x i32>
@@ -178,9 +160,7 @@ define <vscale x 4 x i16> @test_nxv4i32_nxv4i16(<vscale x 4 x i32> %x) {
 define <vscale x 4 x i16> @test_nxv4i64_nxv4i16(<vscale x 4 x i64> %x) {
 ; CHECK-LABEL: test_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v8, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v12, v8, 0
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
 ; CHECK-NEXT:    vnclipu.wi v8, v12, 0
@@ -196,10 +176,9 @@ define <vscale x 4 x i16> @test_nxv4i64_nxv4i16(<vscale x 4 x i64> %x) {
 define <vscale x 4 x i32> @test_nxv4i64_nxv4i32(<vscale x 4 x i64> %x) {
 ; CHECK-LABEL: test_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vmax.vx v12, v8, zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vnclipu.wi v8, v12, 0
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vnclipu.wi v12, v8, 0
+; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %a = icmp sgt <vscale x 4 x i64> %x, zeroinitializer
   %b = sext <vscale x 4 x i1> %a to <vscale x 4 x i64>

>From f81fd3d4b89ca96f8dce5a0fe14753d24b36445a Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 1 Aug 2024 20:03:35 +0900
Subject: [PATCH 19/31] aarch64: add tests for trunc between unsupported type

---
 llvm/test/CodeGen/AArch64/qmovn.ll | 111 +++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 65d148d44844b1..2685ea9fb5d202 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -421,3 +421,114 @@ entry:
   %shuffle = shufflevector <2 x i32> %x, <2 x i32> %trunc, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %shuffle
 }
+
+; Type support varification - not supported with saturated value
+; i64 -> i16
+define <4 x i16> @sminsmax_range_unsigned_i64_to_i16(<2 x i16> %x, <2 x i64> %y) {
+; CHECK-LABEL: sminsmax_range_unsigned_i64_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmgt v2.2d, v1.2d, #0
+; CHECK-NEXT:    movi v3.2d, #0x0000000000ffff
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    cmgt v2.2d, v3.2d, v1.2d
+; CHECK-NEXT:    bif v1.16b, v3.16b, v2.16b
+; CHECK-NEXT:    xtn v1.2s, v1.2d
+; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+entry:
+  %smax = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> zeroinitializer)
+  %smin = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %smax, <2 x i64> <i64 65535, i64 65535>)
+  %trunc = trunc <2 x i64> %smin to <2 x i16>
+  %shuffle = shufflevector <2 x i16> %x, <2 x i16> %trunc, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle
+}
+
+define <4 x i16> @sminsmax_range_signed_i64_to_i16(<2 x i16> %x, <2 x i64> %y) {
+; CHECK-LABEL: sminsmax_range_signed_i64_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x8, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT:    dup v2.2d, x8
+; CHECK-NEXT:    mov w8, #32767 // =0x7fff
+; CHECK-NEXT:    cmgt v3.2d, v1.2d, v2.2d
+; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT:    dup v2.2d, x8
+; CHECK-NEXT:    cmgt v3.2d, v2.2d, v1.2d
+; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT:    xtn v1.2s, v1.2d
+; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+entry:
+  %smax = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %y, <2 x i64> <i64 -32768, i64 -32768>)
+  %smin = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %smax, <2 x i64> <i64 32767, i64 32767>)
+  %trunc = trunc <2 x i64> %smin to <2 x i16>
+  %shuffle = shufflevector <2 x i16> %x, <2 x i16> %trunc, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle
+}
+
+define <4 x i16> @umin_range_unsigned_i64_to_i16(<2 x i16> %x, <2 x i64> %y) {
+; CHECK-LABEL: umin_range_unsigned_i64_to_i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0x0000000000ffff
+; CHECK-NEXT:    cmhi v3.2d, v2.2d, v1.2d
+; CHECK-NEXT:    bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT:    xtn v1.2s, v1.2d
+; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+entry:
+  %umin = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %y, <2 x i64> <i64 65535, i64 65535>)
+  %trunc = trunc <2 x i64> %umin to <2 x i16>
+  %shuffle = shufflevector <2 x i16> %x, <2 x i16> %trunc, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i16> %shuffle
+}
+
+; i32 -> i8
+define <8 x i8> @sminsmax_range_unsigned_i64_to_i8(<4 x i8> %x, <4 x i32> %y) {
+; CHECK-LABEL: sminsmax_range_unsigned_i64_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    xtn v1.4h, v1.4s
+; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+entry:
+  %smax = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> zeroinitializer)
+  %smin = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %smax, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
+  %trunc = trunc <4 x i32> %smin to <4 x i8>
+  %shuffle = shufflevector <4 x i8> %x, <4 x i8> %trunc, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle
+}
+
+define <8 x i8> @sminsmax_range_signed_i32_to_i8(<4 x i8> %x, <4 x i32> %y) {
+; CHECK-LABEL: sminsmax_range_signed_i32_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mvni v2.4s, #127
+; CHECK-NEXT:    smax v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    movi v2.4s, #127
+; CHECK-NEXT:    smin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    xtn v1.4h, v1.4s
+; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+entry:
+  %smax = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
+  %smin = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %smax, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
+  %trunc = trunc <4 x i32> %smin to <4 x i8>
+  %shuffle = shufflevector <4 x i8> %x, <4 x i8> %trunc, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle
+}
+
+define <8 x i8> @umin_range_unsigned_i32_to_i8(<4 x i8> %x, <4 x i32> %y) {
+; CHECK-LABEL: umin_range_unsigned_i32_to_i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0x0000ff000000ff
+; CHECK-NEXT:    umin v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    xtn v1.4h, v1.4s
+; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+entry:
+  %umin = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %y, <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
+  %trunc = trunc <4 x i32> %umin to <4 x i8>
+  %shuffle = shufflevector <4 x i8> %x, <4 x i8> %trunc, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i8> %shuffle
+}

>From f29a72dd70f5d852507197b4630716ef790b9ac1 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 2 Aug 2024 16:23:03 +0900
Subject: [PATCH 20/31] aarch64: update trunc[us]at comment

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 3fe27c4be56776..9ed8d3ee3dcee0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5446,51 +5446,43 @@ def : Pat<(v2i32 (truncssat_u (v2i64 V128:$Vn))),
           (SQXTUNv2i32 V128:$Vn)>;
 
 // truncusat_u
-// concat_vectors(Vd, trunc(umin(X, 255))) ~> UQXTRN(Vd, Vn)
+// concat_vectors(Vd, truncusat_u(Vn)) ~> UQXTRN(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncusat_u (v8i16 V128:$Vn))))),
           (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(umin(X, 65535))) ~> UQXTRN(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
                  (v4i16 (truncusat_u (v4i32 V128:$Vn))))),
           (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(umin(X, 4294967295))) ~> UQXTRN(Vd, Vn)
 def : Pat<(v4i32 (concat_vectors
                  (v2i32 V64:$Vd),
                  (v2i32 (truncusat_u (v2i64 V128:$Vn))))),
           (UQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
-// truncssat_s
-// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127)) ~> SQXTN2(Vd, Vn)
+// concat_vectors(Vd, truncssat_s(Vn)) ~> SQXTN2(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncssat_s (v8i16 V128:$Vn))))),
           (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767)) ~> SQXTN2(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
                  (v4i16 (truncssat_s (v4i32 V128:$Vn))))),
           (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(smin(smax Vm, -2147483648), 2147483647) ~> SQXTN2(Vd, Vn)
 def : Pat<(v4i32 (concat_vectors
                  (v2i32 V64:$Vd),
                  (v2i32 (truncssat_s (v2i64 V128:$Vn))))),
           (SQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
 
-// truncssat_u
-// concat_vectors(Vd, trunc(smin(smax Vm, 0), 127) ~> SQXTUN2(Vd, Vn)
+// concat_vectors(Vd, truncssat_u(Vn)) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (truncssat_u (v8i16 V128:$Vn))))),
           (SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(smin(smax Vm, 0), 32767) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v8i16 (concat_vectors
                  (v4i16 V64:$Vd),
                  (v4i16 (truncssat_u (v4i32 V128:$Vn))))),
           (SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
-// concat_vectors(Vd, trunc(smin(smax Vm, 0), 2147483647) ~> SQXTUN2(Vd, Vn)
 def : Pat<(v4i32 (concat_vectors
                  (v2i32 V64:$Vd),
                  (v2i32 (truncssat_u (v2i64 V128:$Vn))))),

>From 2abb72a3b9ad52ecc25a4690febfd7e2b1570609 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 2 Aug 2024 22:07:15 +0900
Subject: [PATCH 21/31] DAG: refactoring VisitTRUNATE_USAT

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 +++++++++----------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1adf7e77db6024..9a1171b331a7df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14924,32 +14924,29 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
 
-  auto MatchFPTOINT = [&](SDValue Val) -> SDValue {
+  std::function<SDValue(SDValue)> MatchFPTOINT = [&](SDValue Val) -> SDValue {
     if (Val.getOpcode() == ISD::FP_TO_SINT ||
         Val.getOpcode() == ISD::FP_TO_UINT)
       return Val;
+    if (Val.getOpcode() == ISD::SMAX) {
+      for (unsigned I = 0; I < Val.getNumOperands(); I++)
+        if (SDValue Matched = MatchFPTOINT(Val.getOperand(I)))
+          return Matched;
+    }
     return SDValue();
   };
 
-  SDValue FPInstr;
-  if (N0.getOpcode() == ISD::SMAX) {
-    FPInstr = MatchFPTOINT(N0.getOperand(0));
-    if (!FPInstr)
-      FPInstr = MatchFPTOINT(N0.getOperand(1));
-  } else
-    FPInstr = MatchFPTOINT(N0);
-
-  if (FPInstr) {
-    EVT FPVT = FPInstr.getOperand(0).getValueType();
-    if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
-                                                          FPVT, VT))
-      return SDValue();
-    return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(FPInstr), VT,
-                       FPInstr.getOperand(0),
-                       DAG.getValueType(VT.getScalarType()));
-  }
+  SDValue FPInstr = MatchFPTOINT(N0);
+  if (!FPInstr)
+    return SDValue();
 
-  return SDValue();
+  EVT FPVT = FPInstr.getOperand(0).getValueType();
+  if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
+                                                        FPVT, VT))
+    return SDValue();
+  return DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(FPInstr), VT,
+                     FPInstr.getOperand(0),
+                     DAG.getValueType(VT.getScalarType()));
 }
 
 // Match min/max and return limit value as a parameter.

>From 6fdaa2ddf556e6ab18ae7bad021217f256d84b2e Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Mon, 5 Aug 2024 20:11:44 +0900
Subject: [PATCH 22/31] DAG: matchMinMax split into matchMin and matchMax

Split the matchMinMax function into a matchMin/matchMax function.
The min/max values obtained by calling the matchMinMax function were
redundant because they were verified by separate logic.

By including that logic within the function, it is now changed to get
the value verified by calling this function.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 103 +++++++++---------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a1171b331a7df..5d7cd23f69f088 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14949,21 +14949,28 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
                      DAG.getValueType(VT.getScalarType()));
 }
 
-// Match min/max and return limit value as a parameter.
-static SDValue matchMinMax(SDValue V, unsigned Opcode, APInt &Limit,
-                           bool Signed) {
-  if (V.getOpcode() == Opcode) {
-    if (Signed) {
-      APInt C;
-      if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) &&
-          C == Limit)
-        return V.getOperand(0);
-    } else if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit))
+// Match min and return limit value as a parameter.
+static SDValue matchMin(SDValue V, APInt &Limit, APInt &C) {
+  if (V.getOpcode() == ISD::SMIN || V.getOpcode() == ISD::UMIN) {
+    if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit)
       return V.getOperand(0);
   }
   return SDValue();
 }
 
+// Match max and return limit value as a parameter.
+static SDValue matchMax(SDValue V, APInt &Limit, APInt &C) {
+  if (V.getOpcode() == ISD::SMAX) {
+    if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C)) {
+      if (Limit.isNegative() && C == Limit)
+        return V.getOperand(0);
+      else if (C.uge(Limit))
+        return V.getOperand(0);
+    }
+  }
+  return SDValue();
+}
+
 /// Detect patterns of truncation with unsigned saturation:
 ///
 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
@@ -14971,18 +14978,17 @@ static SDValue matchMinMax(SDValue V, unsigned Opcode, APInt &Limit,
 /// not matched.
 ///
 static SDValue detectUSatUPattern(SDValue In, EVT VT) {
-  EVT InVT = In.getValueType();
-
+  unsigned NumDstBits = VT.getScalarSizeInBits();
+  unsigned NumSrcBits = In.getScalarValueSizeInBits();
   // Saturation with truncation. We truncate from InVT to VT.
-  assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
-         "Unexpected types for truncate operation");
+  assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
   APInt Max;
-  if (SDValue UMin = matchMinMax(In, ISD::UMIN, Max, /*Signed*/ false)) {
+  APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
+  if (SDValue UMin = matchMin(In, UnsignedMax, Max)) {
     // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
     // the element size of the destination type.
-    if (Max.isMask(VT.getScalarSizeInBits()))
-      return UMin;
+    return UMin;
   }
 
   return SDValue();
@@ -15000,23 +15006,20 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
 static SDValue detectSSatSPattern(SDValue In, EVT VT) {
   unsigned NumDstBits = VT.getScalarSizeInBits();
   unsigned NumSrcBits = In.getScalarValueSizeInBits();
+  // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt SignedMax, SignedMin;
+  APInt SignedMax, SignedMin, C;
   SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
   SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
-  if (SDValue SMin = matchMinMax(In, ISD::SMIN, SignedMax, /*Signed*/ true)) {
-    if (SDValue SMax =
-            matchMinMax(SMin, ISD::SMAX, SignedMin, /*Signed*/ true)) {
+  if (SDValue SMin = matchMin(In, SignedMax, C))
+    if (SDValue SMax = matchMax(SMin, SignedMin, C))
       return SMax;
-    }
-  }
-  if (SDValue SMax = matchMinMax(In, ISD::SMAX, SignedMin, /*Signed*/ true)) {
-    if (SDValue SMin =
-            matchMinMax(SMax, ISD::SMIN, SignedMax, /*Signed*/ true)) {
+
+  if (SDValue SMax = matchMax(In, SignedMin, C))
+    if (SDValue SMin = matchMin(SMax, SignedMax, C))
       return SMin;
-    }
-  }
+
   return SDValue();
 }
 
@@ -15024,42 +15027,40 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
 static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
                                   const SDLoc &DL) {
   EVT InVT = In.getValueType();
-
+  unsigned NumDstBits = VT.getScalarSizeInBits();
+  unsigned NumSrcBits = In.getScalarValueSizeInBits();
   // Saturation with truncation. We truncate from InVT to VT.
-  assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
-         "Unexpected types for truncate operation");
+  assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt Min, Max;
+  APInt Min, Max, UnsignedMax, UnsignedMin;
   SDValue SMax, SMin, UMin, First;
+  UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
+  UnsignedMin = APInt::getMinValue(NumDstBits).zext(NumSrcBits);
   // (truncate (smin (smax (x, Min), Max)) to dest_type),
+  if (First = SMax = matchMax(In, UnsignedMin, Min))
+    SMin = matchMin(First, UnsignedMax, Max);
   // (truncate (smax (smin (x, Max), Min)) to dest_type)
+  else if (First = SMin = matchMin(In, UnsignedMax, Max))
+    SMax = matchMax(First, UnsignedMin, Min);
   // (truncate (umin (smax (x, Min), Max)) to dest_type)
-  // where Min >= 0, Max is unsigned max of destination type and Min <= Max.
-  if (First = SMax = matchMinMax(In, ISD::SMAX, Min, /*Signed*/ false))
-    SMin = matchMinMax(First, ISD::SMIN, Max, /*Signed*/ false);
-  else if (First = SMin = matchMinMax(In, ISD::SMIN, Max, /*Signed*/ false))
-    SMax = matchMinMax(First, ISD::SMAX, Min, /*Signed*/ false);
-  else if (First = UMin = matchMinMax(In, ISD::UMIN, Max, /*Signed*/ false))
-    SMax = matchMinMax(UMin, ISD::SMAX, Min, /*Signed*/ false);
-
-  if (SMax && Min.isNonNegative() && Max.isMask(VT.getScalarSizeInBits())) {
+  else if (First = UMin = matchMin(In, UnsignedMax, Max))
+    SMax = matchMax(UMin, UnsignedMin, Min);
+
+  if (SMax) {
     if (SMin) {
-      if (Min.isZero()) {
-        if (isa<ConstantSDNode>(First.getOperand(0)))
-          return First.getOperand(1);
+      // Min == 0, Max is unsigned max of destination type.
+      if (Min.isZero())
         return First.getOperand(0);
-      } else {
+      else {
+        // Min > 0, Max is unsigned max of destination type and Min <= Max.
         if (First == SMin)
           return SMin;
-        else if (Max.uge(Min))
-          return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
+        return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
       }
     } else if (UMin) {
-      if (Min.isZero()) {
-        if (isa<ConstantSDNode>(First.getOperand(0)))
-          return First.getOperand(1);
+      // Min == 0, Max is unsigned max of destination type.
+      if (Min.isZero())
         return First.getOperand(0);
-      }
     }
   }
 

>From 7d5e569090dbf979dbc490a3daa4025df547bbc1 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Mon, 5 Aug 2024 20:12:02 +0900
Subject: [PATCH 23/31] DAG: Replace redundant code with function

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d7cd23f69f088..33220dbd9f4799 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15070,22 +15070,23 @@ static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
 static SDValue foldToSaturated(SDNode *N, EVT &VT, SDValue &Src, EVT &SrcVT,
                                SDLoc &DL, const TargetLowering &TLI,
                                SelectionDAG &DAG) {
+  auto AllowedTruncateSat = [&](unsigned Opc, EVT SrcVT, EVT VT) -> bool {
+    return (TLI.isOperationLegalOrCustom(Opc, SrcVT) &&
+            TLI.isTypeDesirableForOp(Opc, VT));
+  };
+
   if (Src.getOpcode() == ISD::SMIN || Src.getOpcode() == ISD::SMAX) {
-    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_S, SrcVT) &&
-        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_S, VT))
+    if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_S, SrcVT, VT))
       if (SDValue SSatVal = detectSSatSPattern(Src, VT))
         return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, VT, SSatVal);
-    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_U, SrcVT) &&
-        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_U, VT))
+    if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
       if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
         return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
   } else if (Src.getOpcode() == ISD::UMIN) {
-    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_SSAT_U, SrcVT) &&
-        TLI.isTypeDesirableForOp(ISD::TRUNCATE_SSAT_U, VT))
+    if (AllowedTruncateSat(ISD::TRUNCATE_SSAT_U, SrcVT, VT))
       if (SDValue SSatVal = detectSSatUPattern(Src, VT, DAG, DL))
         return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, VT, SSatVal);
-    if (TLI.isOperationLegalOrCustom(ISD::TRUNCATE_USAT_U, SrcVT) &&
-        TLI.isTypeDesirableForOp(ISD::TRUNCATE_USAT_U, VT))
+    if (AllowedTruncateSat(ISD::TRUNCATE_USAT_U, SrcVT, VT))
       if (SDValue USatVal = detectUSatUPattern(Src, VT))
         return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, VT, USatVal);
   }

>From 1d13eb76971ec17806060e618c64d37e0f5e23fa Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 6 Aug 2024 15:36:07 +0900
Subject: [PATCH 24/31] DAG: fix misused comment

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 33220dbd9f4799..0c5f1a7f0ea3c5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14995,12 +14995,12 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
 }
 
 /// Detect patterns of truncation with signed saturation:
-/// (truncate (smin ((smax (x, signed_min_of_dest_type)),
+/// (truncate (smin (smax (x, signed_min_of_dest_type),
 ///                  signed_max_of_dest_type)) to dest_type)
 /// or:
-/// (truncate (smax ((smin (x, signed_max_of_dest_type)),
+/// (truncate (smax (smin (x, signed_max_of_dest_type),
 ///                  signed_min_of_dest_type)) to dest_type).
-/// With MatchPackUS, the smax/smin range is [0, unsigned_max_of_dest_type].
+///
 /// Return the source value to be truncated or SDValue() if the pattern was not
 /// matched.
 static SDValue detectSSatSPattern(SDValue In, EVT VT) {

>From ae14467bfb6b0942244fc422240de87e12db1724 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Wed, 7 Aug 2024 01:15:49 +0900
Subject: [PATCH 25/31] DAG: Restore matchMinMax to take an Opcode as an
 argument and handle the case of Min > 0

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 96 +++++++++----------
 1 file changed, 44 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0c5f1a7f0ea3c5..199226cbfe2a41 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14950,21 +14950,14 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
 }
 
 // Match min and return limit value as a parameter.
-static SDValue matchMin(SDValue V, APInt &Limit, APInt &C) {
-  if (V.getOpcode() == ISD::SMIN || V.getOpcode() == ISD::UMIN) {
-    if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit)
-      return V.getOperand(0);
-  }
-  return SDValue();
-}
-
-// Match max and return limit value as a parameter.
-static SDValue matchMax(SDValue V, APInt &Limit, APInt &C) {
-  if (V.getOpcode() == ISD::SMAX) {
+static SDValue matchMinMax(SDValue V, unsigned Opcode, APInt &Limit) {
+  if (V.getOpcode() == Opcode) {
+    APInt C;
     if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C)) {
-      if (Limit.isNegative() && C == Limit)
+      if (C == Limit)
         return V.getOperand(0);
-      else if (C.uge(Limit))
+      if (V.getOpcode() == ISD::SMAX && !Limit.isNegative() && C.uge(Limit) &&
+          Limit != 0)
         return V.getOperand(0);
     }
   }
@@ -14983,12 +14976,11 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
   // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt Max;
   APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
-  if (SDValue UMin = matchMin(In, UnsignedMax, Max)) {
+  if (SDValue Min = matchMinMax(In, ISD::UMIN, UnsignedMax)) {
     // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
     // the element size of the destination type.
-    return UMin;
+    return Min;
   }
 
   return SDValue();
@@ -15009,16 +15001,17 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
   // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt SignedMax, SignedMin, C;
+  APInt SignedMax, SignedMin;
+  SDValue Max, Min;
   SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
   SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
-  if (SDValue SMin = matchMin(In, SignedMax, C))
-    if (SDValue SMax = matchMax(SMin, SignedMin, C))
-      return SMax;
+  if (Min = matchMinMax(In, ISD::SMIN, SignedMax))
+    if (Max = matchMinMax(Min, ISD::SMAX, SignedMin))
+      return Max;
 
-  if (SDValue SMax = matchMax(In, SignedMin, C))
-    if (SDValue SMin = matchMin(SMax, SignedMax, C))
-      return SMin;
+  if (Max = matchMinMax(In, ISD::SMAX, SignedMin))
+    if (Min = matchMinMax(Max, ISD::SMIN, SignedMax))
+      return Min;
 
   return SDValue();
 }
@@ -15032,36 +15025,35 @@ static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
   // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt Min, Max, UnsignedMax, UnsignedMin;
-  SDValue SMax, SMin, UMin, First;
+  APInt UnsignedMax, Zero, One;
+  SDValue Max, Min;
   UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
-  UnsignedMin = APInt::getMinValue(NumDstBits).zext(NumSrcBits);
-  // (truncate (smin (smax (x, Min), Max)) to dest_type),
-  if (First = SMax = matchMax(In, UnsignedMin, Min))
-    SMin = matchMin(First, UnsignedMax, Max);
-  // (truncate (smax (smin (x, Max), Min)) to dest_type)
-  else if (First = SMin = matchMin(In, UnsignedMax, Max))
-    SMax = matchMax(First, UnsignedMin, Min);
-  // (truncate (umin (smax (x, Min), Max)) to dest_type)
-  else if (First = UMin = matchMin(In, UnsignedMax, Max))
-    SMax = matchMax(UMin, UnsignedMin, Min);
-
-  if (SMax) {
-    if (SMin) {
-      // Min == 0, Max is unsigned max of destination type.
-      if (Min.isZero())
-        return First.getOperand(0);
-      else {
-        // Min > 0, Max is unsigned max of destination type and Min <= Max.
-        if (First == SMin)
-          return SMin;
-        return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1));
-      }
-    } else if (UMin) {
-      // Min == 0, Max is unsigned max of destination type.
-      if (Min.isZero())
-        return First.getOperand(0);
-    }
+  Zero = APInt::getZero(NumSrcBits);
+  One = APInt(NumSrcBits, 1);
+
+  // Min == 0, Max is unsigned max of destination type.
+  if (Max = matchMinMax(In, ISD::SMAX, Zero)) {
+    if (Min = matchMinMax(Max, ISD::SMIN, UnsignedMax))
+      return Max.getOperand(0);
+  }
+  // Min >= 1, Max is unsigned max of destination type and Min <= Max.
+  if (Max = matchMinMax(In, ISD::SMAX, One)) {
+    if (Min = matchMinMax(Max, ISD::SMIN, UnsignedMax))
+      return DAG.getNode(ISD::SMAX, DL, InVT, Min, In.getOperand(1));
+  }
+  // Min >= 0, Max is unsigned max of destination type.
+  if (Min = matchMinMax(In, ISD::SMIN, UnsignedMax)) {
+    if (Max = matchMinMax(Min, ISD::SMAX, Zero))
+      return Min.getOperand(0);
+    // Min >= 1
+    if (Max = matchMinMax(Min, ISD::SMAX, One))
+      return Min;
+  }
+  if (Min = matchMinMax(In, ISD::UMIN, UnsignedMax)) {
+    if (Max = matchMinMax(Min, ISD::SMAX, Zero))
+      return Min.getOperand(0);
+    if (Max = matchMinMax(Min, ISD::SMAX, One))
+      return Min;
   }
 
   return SDValue();

>From 76ea63c9ffe4010f12b59effeb18e0eeb1cffb1a Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Wed, 7 Aug 2024 01:19:13 +0900
Subject: [PATCH 26/31] DAG: Fix combining truncusat(fpto[su]i(x)) ->
 fpto[su]i_sat to only available for fptoui

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  3 +-
 llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 40 ++++++++-----------
 2 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 199226cbfe2a41..e0c630143edfcb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14925,8 +14925,7 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
 
   std::function<SDValue(SDValue)> MatchFPTOINT = [&](SDValue Val) -> SDValue {
-    if (Val.getOpcode() == ISD::FP_TO_SINT ||
-        Val.getOpcode() == ISD::FP_TO_UINT)
+    if (Val.getOpcode() == ISD::FP_TO_UINT)
       return Val;
     if (Val.getOpcode() == ISD::SMAX) {
       for (unsigned I = 0; I < Val.getNumOperands(); I++)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index ca629cee24d36d..9157bcba59e9bb 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -41,12 +41,8 @@ entry:
 define <2 x i32> @ustest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    sqxtun v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
@@ -238,8 +234,8 @@ entry:
 define <4 x i16> @ustest_f32i16(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    sqxtun v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <4 x float> %x to <4 x i32>
@@ -304,10 +300,10 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v0.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v0.4s
+; CHECK-CVT-NEXT:    sqxtun v0.4h, v1.4s
+; CHECK-CVT-NEXT:    sqxtun2 v0.8h, v2.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: ustest_f16i16:
@@ -683,12 +679,8 @@ entry:
 define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov d1, v0.d[1]
-; CHECK-NEXT:    fcvtzu w8, d0
-; CHECK-NEXT:    fcvtzu w9, d1
-; CHECK-NEXT:    fmov s0, w8
-; CHECK-NEXT:    mov v0.s[1], w9
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    sqxtun v0.2s, v0.2d
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <2 x double> %x to <2 x i64>
@@ -860,8 +852,8 @@ entry:
 define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i16_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    uqxtn v0.4h, v0.4s
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    sqxtun v0.4h, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi <4 x float> %x to <4 x i32>
@@ -921,10 +913,10 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-CVT-NEXT:    fcvtzu v1.4s, v1.4s
-; CHECK-CVT-NEXT:    fcvtzu v2.4s, v0.4s
-; CHECK-CVT-NEXT:    uqxtn v0.4h, v1.4s
-; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
+; CHECK-CVT-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-CVT-NEXT:    fcvtzs v2.4s, v0.4s
+; CHECK-CVT-NEXT:    sqxtun v0.4h, v1.4s
+; CHECK-CVT-NEXT:    sqxtun2 v0.8h, v2.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: ustest_f16i16_mm:

>From d41fe3ebf40745701ed0ce2cd2991856f97c59a0 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Wed, 7 Aug 2024 21:32:55 +0900
Subject: [PATCH 27/31] DAG: fix order TRUNCATE_[US]SAT_U in switch-case

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e0c630143edfcb..acefee6efd71e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1909,8 +1909,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ZERO_EXTEND_VECTOR_INREG:
   case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
   case ISD::TRUNCATE:           return visitTRUNCATE(N);
-  case ISD::TRUNCATE_USAT_U:
-  case ISD::TRUNCATE_SSAT_U:    return visitTRUNCATE_USAT(N);
+  case ISD::TRUNCATE_SSAT_U:
+  case ISD::TRUNCATE_USAT_U:    return visitTRUNCATE_USAT(N);
   case ISD::BITCAST:            return visitBITCAST(N);
   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   case ISD::FADD:               return visitFADD(N);

>From 3b5b5748887580e144bf53fd6ab218d81dfa6467 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Wed, 7 Aug 2024 21:33:18 +0900
Subject: [PATCH 28/31] DAG: remove handling when TRUNCATE_USAT_U has operand
 ISD:SMAX

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index acefee6efd71e8..3cf85cc7e55ae6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14927,11 +14927,6 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
   std::function<SDValue(SDValue)> MatchFPTOINT = [&](SDValue Val) -> SDValue {
     if (Val.getOpcode() == ISD::FP_TO_UINT)
       return Val;
-    if (Val.getOpcode() == ISD::SMAX) {
-      for (unsigned I = 0; I < Val.getNumOperands(); I++)
-        if (SDValue Matched = MatchFPTOINT(Val.getOperand(I)))
-          return Matched;
-    }
     return SDValue();
   };
 

>From f7e1ae32db1c23886a3938ed998e0aa78d992493 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 8 Aug 2024 03:33:22 +0900
Subject: [PATCH 29/31] DAG: remove handling `Min>0` case

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3cf85cc7e55ae6..32bc83c20d762c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14950,9 +14950,6 @@ static SDValue matchMinMax(SDValue V, unsigned Opcode, APInt &Limit) {
     if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C)) {
       if (C == Limit)
         return V.getOperand(0);
-      if (V.getOpcode() == ISD::SMAX && !Limit.isNegative() && C.uge(Limit) &&
-          Limit != 0)
-        return V.getOperand(0);
     }
   }
   return SDValue();
@@ -15030,24 +15027,14 @@ static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
     if (Min = matchMinMax(Max, ISD::SMIN, UnsignedMax))
       return Max.getOperand(0);
   }
-  // Min >= 1, Max is unsigned max of destination type and Min <= Max.
-  if (Max = matchMinMax(In, ISD::SMAX, One)) {
-    if (Min = matchMinMax(Max, ISD::SMIN, UnsignedMax))
-      return DAG.getNode(ISD::SMAX, DL, InVT, Min, In.getOperand(1));
-  }
   // Min >= 0, Max is unsigned max of destination type.
   if (Min = matchMinMax(In, ISD::SMIN, UnsignedMax)) {
     if (Max = matchMinMax(Min, ISD::SMAX, Zero))
       return Min.getOperand(0);
-    // Min >= 1
-    if (Max = matchMinMax(Min, ISD::SMAX, One))
-      return Min;
   }
   if (Min = matchMinMax(In, ISD::UMIN, UnsignedMax)) {
     if (Max = matchMinMax(Min, ISD::SMAX, Zero))
       return Min.getOperand(0);
-    if (Max = matchMinMax(Min, ISD::SMAX, One))
-      return Min;
   }
 
   return SDValue();

>From b00e7f44ad17fcd2214fecb67aa6d2b77e7ebca7 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Thu, 8 Aug 2024 22:42:38 +0900
Subject: [PATCH 30/31] DAG: Replace matchMinMax function with using sd_match

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 68 +++++++------------
 1 file changed, 25 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 32bc83c20d762c..f352854ffd7fbb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14943,18 +14943,6 @@ SDValue DAGCombiner::visitTRUNCATE_USAT(SDNode *N) {
                      DAG.getValueType(VT.getScalarType()));
 }
 
-// Match min and return limit value as a parameter.
-static SDValue matchMinMax(SDValue V, unsigned Opcode, APInt &Limit) {
-  if (V.getOpcode() == Opcode) {
-    APInt C;
-    if (ISD::isConstantSplatVector(V.getOperand(1).getNode(), C)) {
-      if (C == Limit)
-        return V.getOperand(0);
-    }
-  }
-  return SDValue();
-}
-
 /// Detect patterns of truncation with unsigned saturation:
 ///
 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
@@ -14967,12 +14955,10 @@ static SDValue detectUSatUPattern(SDValue In, EVT VT) {
   // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
+  SDValue Min;
   APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
-  if (SDValue Min = matchMinMax(In, ISD::UMIN, UnsignedMax)) {
-    // C2 should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
-    // the element size of the destination type.
+  if (sd_match(In, m_UMin(m_Value(Min), m_SpecificInt(UnsignedMax))))
     return Min;
-  }
 
   return SDValue();
 }
@@ -14992,17 +14978,17 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
   // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt SignedMax, SignedMin;
-  SDValue Max, Min;
-  SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
-  SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
-  if (Min = matchMinMax(In, ISD::SMIN, SignedMax))
-    if (Max = matchMinMax(Min, ISD::SMAX, SignedMin))
-      return Max;
+  SDValue Val;
+  APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
+  APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
 
-  if (Max = matchMinMax(In, ISD::SMAX, SignedMin))
-    if (Min = matchMinMax(Max, ISD::SMIN, SignedMax))
-      return Min;
+  if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(SignedMin)),
+                          m_SpecificInt(SignedMax))))
+    return Val;
+
+  if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(SignedMax)),
+                          m_SpecificInt(SignedMin))))
+    return Val;
 
   return SDValue();
 }
@@ -15010,32 +14996,28 @@ static SDValue detectSSatSPattern(SDValue In, EVT VT) {
 /// Detect patterns of truncation with unsigned saturation:
 static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
                                   const SDLoc &DL) {
-  EVT InVT = In.getValueType();
   unsigned NumDstBits = VT.getScalarSizeInBits();
   unsigned NumSrcBits = In.getScalarValueSizeInBits();
   // Saturation with truncation. We truncate from InVT to VT.
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
-  APInt UnsignedMax, Zero, One;
-  SDValue Max, Min;
+  SDValue Val;
+  APInt UnsignedMax, Zero;
   UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
   Zero = APInt::getZero(NumSrcBits);
-  One = APInt(NumSrcBits, 1);
 
   // Min == 0, Max is unsigned max of destination type.
-  if (Max = matchMinMax(In, ISD::SMAX, Zero)) {
-    if (Min = matchMinMax(Max, ISD::SMIN, UnsignedMax))
-      return Max.getOperand(0);
-  }
-  // Min >= 0, Max is unsigned max of destination type.
-  if (Min = matchMinMax(In, ISD::SMIN, UnsignedMax)) {
-    if (Max = matchMinMax(Min, ISD::SMAX, Zero))
-      return Min.getOperand(0);
-  }
-  if (Min = matchMinMax(In, ISD::UMIN, UnsignedMax)) {
-    if (Max = matchMinMax(Min, ISD::SMAX, Zero))
-      return Min.getOperand(0);
-  }
+  if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
+                          m_SpecificInt(Zero))))
+    return Val;
+
+  if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(Zero)),
+                          m_SpecificInt(UnsignedMax))))
+    return Val;
+
+  if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_SpecificInt(Zero)),
+                          m_SpecificInt(UnsignedMax))))
+    return Val;
 
   return SDValue();
 }

>From c8866398a2c9fa9e5691d70fd2d86df3de8c4408 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 9 Aug 2024 00:46:00 +0900
Subject: [PATCH 31/31] DAG: Replace pattern matching for Zero with using
 m_Zero()

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f352854ffd7fbb..6f667d7e8e13ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15002,20 +15002,17 @@ static SDValue detectSSatUPattern(SDValue In, EVT VT, SelectionDAG &DAG,
   assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
 
   SDValue Val;
-  APInt UnsignedMax, Zero;
-  UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
-  Zero = APInt::getZero(NumSrcBits);
-
+  APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
   // Min == 0, Max is unsigned max of destination type.
   if (sd_match(In, m_SMax(m_SMin(m_Value(Val), m_SpecificInt(UnsignedMax)),
-                          m_SpecificInt(Zero))))
+                          m_Zero())))
     return Val;
 
-  if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_SpecificInt(Zero)),
+  if (sd_match(In, m_SMin(m_SMax(m_Value(Val), m_Zero()),
                           m_SpecificInt(UnsignedMax))))
     return Val;
 
-  if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_SpecificInt(Zero)),
+  if (sd_match(In, m_UMin(m_SMax(m_Value(Val), m_Zero()),
                           m_SpecificInt(UnsignedMax))))
     return Val;