[llvm] r340128 - [X86] Add support for using 512-bit PSUBUS to combineSelect.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 18 11:51:03 PDT 2018


Author: ctopper
Date: Sat Aug 18 11:51:03 2018
New Revision: 340128

URL: http://llvm.org/viewvc/llvm-project?rev=340128&view=rev
Log:
[X86] Add support for using 512-bit PSUBUS to combineSelect.

The code already support 128 and 256 and even knows to split 256 for AVX1. So we really just needed to stop looking for specific VTs and subtarget features and just look for legal VTs with i8/i16 elements.

While there, add some curly braces around outer if statement bodies that contain only another if. It makes all the closing curly braces look more regular.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/psubus.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=340128&r1=340127&r2=340128&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Aug 18 11:51:03 2018
@@ -33029,9 +33029,10 @@ static SDValue combineSelect(SDNode *N,
 
   // Match VSELECTs into subs with unsigned saturation.
   if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
-      // psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
-      ((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
-       (Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+      // psubus is available in SSE2 for i8 and i16 vectors.
+      Subtarget.hasSSE2() &&
+      (VT.getVectorElementType() == MVT::i8 ||
+       VT.getVectorElementType() == MVT::i16)) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
     // Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -33062,7 +33063,7 @@ static SDValue combineSelect(SDNode *N,
         return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
                                 SUBUSBuilder);
 
-      if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
+      if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
         if (isa<BuildVectorSDNode>(CondRHS)) {
           // If the RHS is a constant we have to reverse the const
           // canonicalization.
@@ -33083,7 +33084,7 @@ static SDValue combineSelect(SDNode *N,
           // FIXME: Would it be better to use computeKnownBits to determine
           //        whether it's safe to decanonicalize the xor?
           // x s< 0 ? x^C : 0 --> subus x, C
-          if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode())
+          if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
             if (CC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
                 ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
                 OpRHSConst->getAPIntValue().isSignMask()) {
@@ -33093,7 +33094,9 @@ static SDValue combineSelect(SDNode *N,
               return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
                                       SUBUSBuilder);
             }
+          }
         }
+      }
     }
   }
 

Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=340128&r1=340127&r2=340128&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sat Aug 18 11:51:03 2018
@@ -1186,8 +1186,7 @@ define <64 x i8> @test17(<64 x i8> %x, i
 ; AVX512-LABEL: test17:
 ; AVX512:       # %bb.0: # %vector.ph
 ; AVX512-NEXT:    vpbroadcastb %edi, %zmm1
-; AVX512-NEXT:    vpcmpnltub %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpsubb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpsubusb %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 vector.ph:
   %0 = insertelement <64 x i8> undef, i8 %w, i32 0
@@ -1236,8 +1235,7 @@ define <32 x i16> @test18(<32 x i16> %x,
 ; AVX512-LABEL: test18:
 ; AVX512:       # %bb.0: # %vector.ph
 ; AVX512-NEXT:    vpbroadcastw %edi, %zmm1
-; AVX512-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k1
-; AVX512-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 vector.ph:
   %0 = insertelement <32 x i16> undef, i16 %w, i32 0




More information about the llvm-commits mailing list