[llvm] [X86] Combine VPERMV3 to VPERMV for i8/i16 (PR #96414)
    via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Sun Jun 23 20:47:41 PDT 2024
    
    
  
================
@@ -41273,6 +41273,33 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
 
     return SDValue();
   }
+  case X86ISD::VPERMV3: {
+    // VPERM[I,T]2[B,W] are 3 uops on Skylake and Icelake so we try to use
+    // VPERMV.
+    if (VT.is512BitVector() || (VT.is256BitVector() && !Subtarget.hasEVEX512()))
+      return SDValue();
+    MVT ElementVT = VT.getVectorElementType();
+    if (ElementVT != MVT::i8 && ElementVT != MVT::i16)
+      return SDValue();
+    SDValue V1 = N.getOperand(0);
+    SDValue V2 = N.getOperand(2);
+    if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        V1.getConstantOperandVal(1) == 0 &&
+        V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        V2.getConstantOperandVal(1) == VT.getVectorNumElements() &&
----------------
goldsteinn wrote:
Should we also handle the inverse (`V2.getConstantOperandVal(1) == 0`)?
https://github.com/llvm/llvm-project/pull/96414
    
    
More information about the llvm-commits
mailing list