[llvm] [ValueTracking][X86] Compute KnownBits for phadd/phsub (PR #92429)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 16 09:54:32 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-analysis

Author: None (mskamp)

<details>
<summary>Changes</summary>

Add KnownBits computations to ValueTracking and X86 DAG lowering.
  
These instructions add/subtract adjacent vector elements in their
operands. Example: phadd [X1, X2] [Y1, Y2] = [X1 + X2, Y1 + Y2].
This means that, in this example, we can compute the KnownBits of the
operation by computing the KnownBits of [X1, X2] + [X1, X2] and
[Y1, Y2] + [Y1, Y2] and intersecting the results. This approach
also generalizes to all x86 vector types.
    
There are also the operations phadd.sw and phsub.sw, which perform
saturating addition/subtraction. Use sadd_sat and ssub_sat to compute
the KnownBits of these operations.
    
Fixes #<!-- -->82516.

---

Patch is 26.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92429.diff


4 Files Affected:

- (modified) llvm/lib/Analysis/ValueTracking.cpp (+48) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+73) 
- (added) llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll (+192) 
- (added) llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll (+201) 


``````````diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 2fdbb6e3ef840..e33cbef61e8f7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1725,6 +1725,54 @@ static void computeKnownBitsFromOperator(const Operator *I,
       case Intrinsic::x86_sse42_crc32_64_64:
         Known.Zero.setBitsFrom(32);
         break;
+      case Intrinsic::x86_ssse3_phadd_d:
+      case Intrinsic::x86_ssse3_phadd_w:
+      case Intrinsic::x86_ssse3_phadd_d_128:
+      case Intrinsic::x86_ssse3_phadd_w_128:
+      case Intrinsic::x86_avx2_phadd_d:
+      case Intrinsic::x86_avx2_phadd_w: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
+                    .intersectWith(KnownBits::computeForAddSub(
+                        true, false, false, Known2, Known2));
+        break;
+      }
+      case Intrinsic::x86_ssse3_phadd_sw:
+      case Intrinsic::x86_ssse3_phadd_sw_128:
+      case Intrinsic::x86_avx2_phadd_sw: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::sadd_sat(Known, Known)
+                    .intersectWith(KnownBits::sadd_sat(Known2, Known2));
+        break;
+      }
+      case Intrinsic::x86_ssse3_phsub_d:
+      case Intrinsic::x86_ssse3_phsub_w:
+      case Intrinsic::x86_ssse3_phsub_d_128:
+      case Intrinsic::x86_ssse3_phsub_w_128:
+      case Intrinsic::x86_avx2_phsub_d:
+      case Intrinsic::x86_avx2_phsub_w: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
+                    .intersectWith(KnownBits::computeForAddSub(
+                        false, false, false, Known2, Known2));
+        break;
+      }
+      case Intrinsic::x86_ssse3_phsub_sw:
+      case Intrinsic::x86_ssse3_phsub_sw_128:
+      case Intrinsic::x86_avx2_phsub_sw: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::ssub_sat(Known, Known)
+                    .intersectWith(KnownBits::ssub_sat(Known2, Known2));
+        break;
+      }
       case Intrinsic::riscv_vsetvli:
       case Intrinsic::riscv_vsetvlimax: {
         bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ecc5b3b3bf840..c23df2c91f385 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37262,6 +37262,27 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     }
     break;
   }
+  case X86ISD::HADD: {
+    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    KnownBits Known2 =
+        DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+    Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
+                .intersectWith(KnownBits::computeForAddSub(true, false, false,
+                                                           Known2, Known2));
+    break;
+  }
+  case X86ISD::HSUB: {
+    Known =
+        DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    KnownBits Known2 =
+        DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+    Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
+                .intersectWith(KnownBits::computeForAddSub(false, false, false,
+                                                           Known2, Known2));
+    break;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     switch (Op->getConstantOperandVal(0)) {
     case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37297,58 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
       break;
     }
+    case Intrinsic::x86_ssse3_phadd_d:
+    case Intrinsic::x86_ssse3_phadd_w:
+    case Intrinsic::x86_ssse3_phadd_d_128:
+    case Intrinsic::x86_ssse3_phadd_w_128:
+    case Intrinsic::x86_avx2_phadd_d:
+    case Intrinsic::x86_avx2_phadd_w: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
+                  .intersectWith(KnownBits::computeForAddSub(true, false, false,
+                                                             Known2, Known2));
+      break;
+    }
+    case Intrinsic::x86_ssse3_phadd_sw:
+    case Intrinsic::x86_ssse3_phadd_sw_128:
+    case Intrinsic::x86_avx2_phadd_sw: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::sadd_sat(Known, Known)
+                  .intersectWith(KnownBits::sadd_sat(Known2, Known2));
+      break;
+    }
+    case Intrinsic::x86_ssse3_phsub_d:
+    case Intrinsic::x86_ssse3_phsub_w:
+    case Intrinsic::x86_ssse3_phsub_d_128:
+    case Intrinsic::x86_ssse3_phsub_w_128:
+    case Intrinsic::x86_avx2_phsub_d:
+    case Intrinsic::x86_avx2_phsub_w: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
+                  .intersectWith(KnownBits::computeForAddSub(
+                      false, false, false, Known2, Known2));
+      break;
+    }
+    case Intrinsic::x86_ssse3_phsub_sw:
+    case Intrinsic::x86_ssse3_phsub_sw_128:
+    case Intrinsic::x86_avx2_phsub_sw: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::ssub_sat(Known, Known)
+                  .intersectWith(KnownBits::ssub_sat(Known2, Known2));
+      break;
+    }
     }
     break;
   }
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll b/llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll
new file mode 100644
index 0000000000000..443ab72ee54cb
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll
@@ -0,0 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
+;
+entry:
+  %0 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
+  %1 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
+  %2 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %0, <4 x i32> %1)
+  %3 = and <4 x i32> %2, <i32 -8, i32 -8, i32 -8, i32 -8>
+  %ret = icmp eq <4 x i32> %3, <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i1> %ret
+}
+
+define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16(
+; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
+;
+entry:
+  %0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %2 = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %0, <8 x i16> %1)
+  %3 = and <8 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
+  %ret = icmp eq <8 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
+  ret <8 x i1> %ret
+}
+
+define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat(
+; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
+;
+entry:
+  %0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %2 = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %0, <8 x i16> %1)
+  %3 = and <8 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
+  %ret = icmp eq <8 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
+  ret <8 x i1> %ret
+}
+
+define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32(
+; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
+;
+entry:
+  %0 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %1 = and <8 x i32> %y, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %0, <8 x i32> %1)
+  %3 = and <8 x i32> %2, <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
+  %ret = icmp eq <8 x i32> %3, <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
+  ret <8 x i1> %ret
+}
+
+define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
+; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16(
+; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
+;
+entry:
+  %0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %0, <16 x i16> %1)
+  %3 = and <16 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
+  %ret = icmp eq <16 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
+  ret <16 x i1> %ret
+}
+
+define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
+; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat(
+; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
+;
+entry:
+  %0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %0, <16 x i16> %1)
+  %3 = and <16 x i16> %2, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
+  %ret = icmp eq <16 x i16> %3, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
+  ret <16 x i1> %ret
+}
+
+define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
+;
+entry:
+  %0 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %1 = or <4 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %2 = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %0, <4 x i32> %1)
+  %conv = trunc <4 x i32> %2 to <4 x i16>
+  %ret = icmp eq <4 x i16> %conv, <i16 3, i16 4, i16 5, i16 6>
+  ret <4 x i1> %ret
+}
+
+define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16(
+; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
+;
+entry:
+  %0 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %1 = or <8 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %2 = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %0, <8 x i16> %1)
+  %conv = trunc <8 x i16> %2 to <8 x i8>
+  %ret = icmp eq <8 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
+  ret <8 x i1> %ret
+}
+
+define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat(
+; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[X]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <8 x i16> [[Y]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <8 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    ret <8 x i1> [[TMP4]]
+;
+entry:
+  %0 = or <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %1 = or <8 x i16> %y, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %2 = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %0, <8 x i16> %1)
+  %3 = and <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %4 = icmp eq <8 x i16> %3, zeroinitializer
+  ret <8 x i1> %4
+}
+
+define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32(
+; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
+;
+entry:
+  %0 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %1 = or <8 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %0, <8 x i32> %1)
+  %conv = trunc <8 x i32> %2 to <8 x i16>
+  %ret = icmp eq <8 x i16> %conv, <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
+  ret <8 x i1> %ret
+}
+
+define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
+; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16(
+; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
+;
+entry:
+  %0 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %1 = or <16 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %0, <16 x i16> %1)
+  %conv = trunc <16 x i16> %2 to <16 x i8>
+  %ret = icmp eq <16 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
+  ret <16 x i1> %ret
+}
+
+define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
+; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat(
+; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = or <16 x i16> [[X]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <16 x i16> [[Y]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <16 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    ret <16 x i1> [[TMP4]]
+;
+entry:
+  %0 = or <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %1 = or <16 x i16> %y, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %0, <16 x i16> %1)
+  %3 = and <16 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %4 = icmp eq <16 x i16> %3, zeroinitializer
+  ret <16 x i1> %4
+}
diff --git a/llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll b/llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll
new file mode 100644
index 0000000000000..eba7b9843d991
--- /dev/null
+++ b/llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s
+
+define <4 x i16> @hadd_trunc_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: hadd_trunc_v4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpackusdw %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
+  %1 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
+  %2 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %0, <4 x i32> %1)
+  %conv = trunc <4 x i32> %2 to <4 x i16>
+  ret <4 x i16> %conv
+}
+
+define <8 x i8> @hadd_trunc_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: hadd_trunc_v8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3]
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %2 = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %0, <8 x i16> %1)
+  %conv = trunc <8 x i16> %2 to <8 x i8>
+  ret <8 x i8> %conv
+}
+
+define <8 x i8> @hadd_trunc_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: hadd_trunc_v8i16_sat:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3]
+; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %1 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %2 = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %0, <8 x i...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/92429


More information about the llvm-commits mailing list