[llvm] 58c01ef - [SelectionDAG] Merge FoldConstantVectorArithmetic into FoldConstantArithmetic (PR36544)

Tue Nov 9 03:31:16 PST 2021

Author: Simon Pilgrim
Date: 2021-11-09T11:31:01Z
New Revision: 58c01ef270dfb9de569133213fd6b6a7bcf8ded3

URL: https://github.com/llvm/llvm-project/commit/58c01ef270dfb9de569133213fd6b6a7bcf8ded3
DIFF: https://github.com/llvm/llvm-project/commit/58c01ef270dfb9de569133213fd6b6a7bcf8ded3.diff

LOG: [SelectionDAG] Merge FoldConstantVectorArithmetic into FoldConstantArithmetic (PR36544)

This patch merges FoldConstantVectorArithmetic back into FoldConstantArithmetic.

Like FoldConstantVectorArithmetic we now handle vector ops with any operand count, but we currently still only handle binops for scalar types - this can be improved in future patches - in particular some common unary/trinary ops still have poor constant folding.

There's one change in functionality causing test changes - FoldConstantVectorArithmetic bails early if the build/splat vector isn't all constant (with some undefs) elements, but FoldConstantArithmetic doesn't - it instead attempts to fold the scalar nodes and bails if they fail to regenerate a constant/undef result, allowing some additional identity/undef patterns to be handled.

Differential Revision: https://reviews.llvm.org/D113300

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/SelectionDAG.h
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/X86/vselect-constants.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 395900c168d70..5a3f4e9a23fff 100644

--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1732,9 +1732,6 @@ class SelectionDAG {
   SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
                                  ArrayRef<SDValue> Ops);
 
-  SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
-                                       ArrayRef<SDValue> Ops);
-
   /// Fold floating-point operations with 2 operands when both operands are
   /// constants and/or undefined.
   SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 72c39886c23c6..0d5513d9e39ef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4888,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTPOP: {
     SDValue Ops = {Operand};
-    if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+    if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
       return Fold;
   }
   }
@@ -5268,162 +5268,56 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
     return SDValue();
 
-  // TODO: For now, the array Ops should only contain two values.
-  // This enforcement will be removed once this function is merged with
-  // FoldConstantVectorArithmetic
-  if (Ops.size() != 2)
+  unsigned NumOps = Ops.size();
+  if (NumOps == 0)
     return SDValue();
 
   if (isUndef(Opcode, Ops))
     return getUNDEF(VT);
 
-  SDNode *N1 = Ops[0].getNode();
-  SDNode *N2 = Ops[1].getNode();
-
   // Handle the case of two scalars.
-  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
-    if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
-      if (C1->isOpaque() || C2->isOpaque())
-        return SDValue();
-
-      Optional<APInt> FoldAttempt =
-          FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
-      if (!FoldAttempt)
-        return SDValue();
-
-      SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
-      assert((!Folded || !VT.isVector()) &&
-             "Can't fold vectors ops with scalar operands");
-      return Folded;
-    }
-  }
-
-  // fold (add Sym, c) -> Sym+c
-  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
-    return FoldSymbolOffset(Opcode, VT, GA, N2);
-  if (TLI->isCommutativeBinOp(Opcode))
-    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
-      return FoldSymbolOffset(Opcode, VT, GA, N1);
-
-  // If this is a bitwise logic opcode see if we can fold bitcasted ops.
-  // TODO: Can we generalize this and fold any bitcasted constant data?
-  if (ISD::isBitwiseLogicOp(Opcode) && N1->getOpcode() == ISD::BITCAST &&
-      N2->getOpcode() == ISD::BITCAST) {
-    SDValue InnerN1 = peekThroughBitcasts(N1->getOperand(0));
-    SDValue InnerN2 = peekThroughBitcasts(N2->getOperand(0));
-    EVT InnerVT = InnerN1.getValueType();
-    if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger())
-      if (SDValue C =
-              FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2}))
-        return getBitcast(VT, C);
-  }
-
-  // For fixed width vectors, extract each constant element and fold them
-  // individually. Either input may be an undef value.
-  bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
-                   N1->getOpcode() == ISD::SPLAT_VECTOR;
-  if (!IsBVOrSV1 && !N1->isUndef())
-    return SDValue();
-  bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
-                   N2->getOpcode() == ISD::SPLAT_VECTOR;
-  if (!IsBVOrSV2 && !N2->isUndef())
-    return SDValue();
-  // If both operands are undef, that's handled the same way as scalars.
-  if (!IsBVOrSV1 && !IsBVOrSV2)
-    return SDValue();
-
-  EVT SVT = VT.getScalarType();
-  EVT LegalSVT = SVT;
-  if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
-    LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
-    if (LegalSVT.bitsLT(SVT))
-      return SDValue();
-  }
-
-  SmallVector<SDValue, 4> Outputs;
-  unsigned NumElts = 0;
-  if (IsBVOrSV1)
-    NumElts = std::max(NumElts, N1->getNumOperands());
-  if (IsBVOrSV2)
-    NumElts = std::max(NumElts, N2->getNumOperands());
-  assert(NumElts != 0 && "Expected non-zero operands");
-  // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
-  // one iteration for that.
-  assert((!VT.isScalableVector() || NumElts == 1) &&
-         "Scalable vector should only have one scalar");
-
-  for (unsigned I = 0; I != NumElts; ++I) {
-    // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
-    // to use operand 0 of the SPLAT_VECTOR for each fixed element.
-    SDValue V1;
-    if (N1->getOpcode() == ISD::BUILD_VECTOR)
-      V1 = N1->getOperand(I);
-    else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
-      V1 = N1->getOperand(0);
-    else
-      V1 = getUNDEF(SVT);
-
-    SDValue V2;
-    if (N2->getOpcode() == ISD::BUILD_VECTOR)
-      V2 = N2->getOperand(I);
-    else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
-      V2 = N2->getOperand(0);
-    else
-      V2 = getUNDEF(SVT);
-
-    if (SVT.isInteger()) {
-      if (V1.getValueType().bitsGT(SVT))
-        V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
-      if (V2.getValueType().bitsGT(SVT))
-        V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+  if (NumOps == 2) {
+    // TODO: Move foldConstantFPMath here?
+
+    if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
+      if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
+        if (C1->isOpaque() || C2->isOpaque())
+          return SDValue();
+
+        Optional<APInt> FoldAttempt =
+            FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+        if (!FoldAttempt)
+          return SDValue();
+
+        SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+        assert((!Folded || !VT.isVector()) &&
+               "Can't fold vectors ops with scalar operands");
+        return Folded;
+      }
     }
 
-    if (V1.getValueType() != SVT || V2.getValueType() != SVT)
-      return SDValue();
+    // fold (add Sym, c) -> Sym+c
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
+      return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
+    if (TLI->isCommutativeBinOp(Opcode))
+      if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
+        return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
 
-    // Fold one vector element.
-    SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
-    if (LegalSVT != SVT)
-      ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
-
-    // Scalar folding only succeeded if the result is a constant or UNDEF.
-    if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
-        ScalarResult.getOpcode() != ISD::ConstantFP)
-      return SDValue();
-    Outputs.push_back(ScalarResult);
-  }
-
-  if (N1->getOpcode() == ISD::BUILD_VECTOR ||
-      N2->getOpcode() == ISD::BUILD_VECTOR) {
-    assert(VT.getVectorNumElements() == Outputs.size() &&
-           "Vector size mismatch!");
-
-    // Build a big vector out of the scalar elements we generated.
-    return getBuildVector(VT, SDLoc(), Outputs);
+    // If this is a bitwise logic opcode see if we can fold bitcasted ops.
+    // TODO: Can we generalize this and fold any bitcasted constant data?
+    if (ISD::isBitwiseLogicOp(Opcode) && Ops[0].getOpcode() == ISD::BITCAST &&
+        Ops[1].getOpcode() == ISD::BITCAST) {
+      SDValue InnerN1 = peekThroughBitcasts(Ops[0].getOperand(0));
+      SDValue InnerN2 = peekThroughBitcasts(Ops[1].getOperand(0));
+      EVT InnerVT = InnerN1.getValueType();
+      if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger())
+        if (SDValue C =
+                FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2}))
+          return getBitcast(VT, C);
+    }
   }
 
-  assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
-          N2->getOpcode() == ISD::SPLAT_VECTOR) &&
-         "One operand should be a splat vector");
-
-  assert(Outputs.size() == 1 && "Vector size mismatch!");
-  return getSplatVector(VT, SDLoc(), Outputs[0]);
-}
-
-// TODO: Merge with FoldConstantArithmetic
-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
-                                                   const SDLoc &DL, EVT VT,
-                                                   ArrayRef<SDValue> Ops) {
-  // If the opcode is a target-specific ISD node, there's nothing we can
-  // do here and the operand rules may not line up with the below, so
-  // bail early.
-  if (Opcode >= ISD::BUILTIN_OP_END)
-    return SDValue();
-
-  if (isUndef(Opcode, Ops))
-    return getUNDEF(VT);
-
-  // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+  // This is for vector folding only from here on.
   if (!VT.isVector())
     return SDValue();
 
@@ -5434,19 +5328,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
            Op.getValueType().getVectorElementCount() == NumElts;
   };
 
-  auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
-    APInt SplatVal;
-    BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+  auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
     return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
-           (BV && BV->isConstant()) ||
-           (Op.getOpcode() == ISD::SPLAT_VECTOR &&
-            ISD::isConstantSplatVector(Op.getNode(), SplatVal));
+           Op.getOpcode() == ISD::BUILD_VECTOR ||
+           Op.getOpcode() == ISD::SPLAT_VECTOR;
   };
 
   // All operands must be vector types with the same number of elements as
-  // the result type and must be either UNDEF or a build vector of constant
+  // the result type and must be either UNDEF or a build/splat vector
   // or UNDEF scalars.
-  if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
+  if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
       !llvm::all_of(Ops, IsScalarOrSameVectorSize))
     return SDValue();
 
@@ -5466,17 +5357,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
   // For scalable vector types we know we're dealing with SPLAT_VECTORs. We
   // only have one operand to check. For fixed-length vector types we may have
   // a combination of BUILD_VECTOR and SPLAT_VECTOR.
-  unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+  unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
 
   // Constant fold each scalar lane separately.
   SmallVector<SDValue, 4> ScalarResults;
-  for (unsigned I = 0; I != NumOperands; I++) {
+  for (unsigned I = 0; I != NumVectorElts; I++) {
     SmallVector<SDValue, 4> ScalarOps;
     for (SDValue Op : Ops) {
       EVT InSVT = Op.getValueType().getScalarType();
       if (Op.getOpcode() != ISD::BUILD_VECTOR &&
           Op.getOpcode() != ISD::SPLAT_VECTOR) {
-        // We've checked that this is UNDEF or a constant of some kind.
         if (Op.isUndef())
           ScalarOps.push_back(getUNDEF(InSVT));
         else
@@ -6160,7 +6050,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return V;
     // Vector constant folding.
     SDValue Ops[] = {N1, N2, N3};
-    if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+    if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
       NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
       return V;
     }

diff  --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index dbb8b31759d6a..786ca0bd2cc19 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -569,27 +569,27 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV32MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
 ; RV32MV-NEXT:    addi a1, sp, 8
 ; RV32MV-NEXT:    vle16.v v8, (a1)
-; RV32MV-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV32MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV32MV-NEXT:    vle16.v v9, (a1)
-; RV32MV-NEXT:    vid.v v10
-; RV32MV-NEXT:    vsub.vv v8, v8, v10
-; RV32MV-NEXT:    vmul.vv v8, v8, v9
-; RV32MV-NEXT:    vadd.vv v9, v8, v8
+; RV32MV-NEXT:    vmv.v.i v9, 10
 ; RV32MV-NEXT:    addi a1, zero, 9
-; RV32MV-NEXT:    vmv.v.i v10, 10
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
-; RV32MV-NEXT:    vmv.s.x v10, a1
+; RV32MV-NEXT:    vmv.s.x v9, a1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32MV-NEXT:    vsll.vv v9, v9, v10
-; RV32MV-NEXT:    addi a1, zero, 2047
-; RV32MV-NEXT:    vand.vx v8, v8, a1
+; RV32MV-NEXT:    lui a1, %hi(.LCPI4_0)
+; RV32MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
+; RV32MV-NEXT:    vle16.v v10, (a1)
+; RV32MV-NEXT:    vid.v v11
+; RV32MV-NEXT:    vsub.vv v8, v8, v11
+; RV32MV-NEXT:    vmul.vv v8, v8, v10
+; RV32MV-NEXT:    vadd.vv v10, v8, v8
+; RV32MV-NEXT:    vsll.vv v9, v10, v9
 ; RV32MV-NEXT:    vmv.v.i v10, 0
-; RV32MV-NEXT:    addi a2, zero, 1
+; RV32MV-NEXT:    addi a1, zero, 1
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV32MV-NEXT:    vmv1r.v v11, v10
-; RV32MV-NEXT:    vmv.s.x v11, a2
+; RV32MV-NEXT:    vmv.s.x v11, a1
+; RV32MV-NEXT:    addi a1, zero, 2047
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    lui a2, %hi(.LCPI4_1)
 ; RV32MV-NEXT:    addi a2, a2, %lo(.LCPI4_1)
 ; RV32MV-NEXT:    vle16.v v12, (a2)
@@ -634,27 +634,27 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
 ; RV64MV-NEXT:    addi a1, sp, 8
 ; RV64MV-NEXT:    vle16.v v8, (a1)
-; RV64MV-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV64MV-NEXT:    vle16.v v9, (a1)
-; RV64MV-NEXT:    vid.v v10
-; RV64MV-NEXT:    vsub.vv v8, v8, v10
-; RV64MV-NEXT:    vmul.vv v8, v8, v9
-; RV64MV-NEXT:    vadd.vv v9, v8, v8
+; RV64MV-NEXT:    vmv.v.i v9, 10
 ; RV64MV-NEXT:    addi a1, zero, 9
-; RV64MV-NEXT:    vmv.v.i v10, 10
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
-; RV64MV-NEXT:    vmv.s.x v10, a1
+; RV64MV-NEXT:    vmv.s.x v9, a1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64MV-NEXT:    vsll.vv v9, v9, v10
-; RV64MV-NEXT:    addi a1, zero, 2047
-; RV64MV-NEXT:    vand.vx v8, v8, a1
+; RV64MV-NEXT:    lui a1, %hi(.LCPI4_0)
+; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
+; RV64MV-NEXT:    vle16.v v10, (a1)
+; RV64MV-NEXT:    vid.v v11
+; RV64MV-NEXT:    vsub.vv v8, v8, v11
+; RV64MV-NEXT:    vmul.vv v8, v8, v10
+; RV64MV-NEXT:    vadd.vv v10, v8, v8
+; RV64MV-NEXT:    vsll.vv v9, v10, v9
 ; RV64MV-NEXT:    vmv.v.i v10, 0
-; RV64MV-NEXT:    addi a2, zero, 1
+; RV64MV-NEXT:    addi a1, zero, 1
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, tu, mu
 ; RV64MV-NEXT:    vmv1r.v v11, v10
-; RV64MV-NEXT:    vmv.s.x v11, a2
+; RV64MV-NEXT:    vmv.s.x v11, a1
+; RV64MV-NEXT:    addi a1, zero, 2047
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
+; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    lui a2, %hi(.LCPI4_1)
 ; RV64MV-NEXT:    addi a2, a2, %lo(.LCPI4_1)
 ; RV64MV-NEXT:    vle16.v v12, (a2)

diff  --git a/llvm/test/CodeGen/X86/vselect-constants.ll b/llvm/test/CodeGen/X86/vselect-constants.ll
index e0f0ce6115d88..9422b836d39dd 100644
--- a/llvm/test/CodeGen/X86/vselect-constants.ll
+++ b/llvm/test/CodeGen/X86/vselect-constants.ll
@@ -260,34 +260,12 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
 define <2 x i37> @ossfuzz21167(<2 x i37> %x, <2 x i37> %y) {
 ; SSE-LABEL: ossfuzz21167:
 ; SSE:       # %bb.0: # %BB
-; SSE-NEXT:    psllq $27, %xmm1
-; SSE-NEXT:    movdqa %xmm1, %xmm0
-; SSE-NEXT:    psrad $27, %xmm0
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
-; SSE-NEXT:    psrlq $27, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
-; SSE-NEXT:    pxor %xmm0, %xmm1
-; SSE-NEXT:    movdqa %xmm1, %xmm2
-; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE-NEXT:    pand %xmm2, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
-; SSE-NEXT:    por %xmm1, %xmm0
-; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ossfuzz21167:
 ; AVX:       # %bb.0: # %BB
-; AVX-NEXT:    vpsllq $27, %xmm1, %xmm0
-; AVX-NEXT:    vpsrad $27, %xmm0, %xmm1
-; AVX-NEXT:    vpsrlq $27, %xmm0, %xmm0
-; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpsrlq $63, %xmm0, %xmm0
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 BB:
   %c0 = icmp sgt <2 x i37> %y, zeroinitializer