[llvm] 08212db - [X86] Add xop/avx2 shifts to X86TargetLowering::isBinOp

Thu Jan 13 09:44:31 PST 2022

Author: Simon Pilgrim
Date: 2022-01-13T17:44:10Z
New Revision: 08212dbc449bbf10652aa735791280266d473076

URL: https://github.com/llvm/llvm-project/commit/08212dbc449bbf10652aa735791280266d473076
DIFF: https://github.com/llvm/llvm-project/commit/08212dbc449bbf10652aa735791280266d473076.diff

LOG: [X86] Add xop/avx2 shifts to X86TargetLowering::isBinOp

Allows shuffle combining through per-element shift nodes

This exposed a number of issues with shuffle combining with target intrinsics that are lowered to nodes later during legalization - in particular shuffle combining and SimplifyDemandedVectorElts were being called after canonicalizeShuffleWithBinOps, meaning that shuffles didn't have a chance to be combined away before the shuffle(binop(x,y)) -> binop(shuffle(x),shuffle(y)) fold.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
    llvm/test/CodeGen/X86/xop-shifts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8f84accce590..474ff4e8bd98 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33081,6 +33081,11 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const {
   case X86ISD::FMAX:
   case X86ISD::FMIN:
   case X86ISD::FANDN:
+  case X86ISD::VPSHA:
+  case X86ISD::VPSHL:
+  case X86ISD::VSHLV:
+  case X86ISD::VSRLV:
+  case X86ISD::VSRAV:
     return true;
   }
 
@@ -38931,9 +38936,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
   if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
     return R;
 
-  if (SDValue R = canonicalizeShuffleWithBinOps(N, DAG, DL))
-    return R;
-
   // Handle specific target shuffles.
   switch (Opcode) {
   case X86ISD::MOVDDUP: {
@@ -39898,6 +39900,12 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
                                        DCI))
       return SDValue(N, 0);
+
+    // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
+    // Perform this after other shuffle combines to allow inner shuffles to be
+    // combined away first.
+    if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, SDLoc(N)))
+      return BinOp;
   }
 
   return SDValue();

diff  --git a/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll b/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
index 8562f80a6e91..514e8d1fdd58 100644
--- a/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll
@@ -113,7 +113,7 @@ define <4 x i32> @demandedelts_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
 define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: demandedelts_vpsrlvq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0
 ; CHECK-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -123,16 +123,13 @@ define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
 }
 
 ;
-; isBinOp Handling (TODO)
+; isBinOp Handling
 ;
 
 define <4 x i32> @binop_shuffle_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: binop_shuffle_vpsllvd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
 ; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
 ; CHECK-NEXT:    retq
   %shuffle0 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %shuffle1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -144,10 +141,7 @@ define <4 x i32> @binop_shuffle_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
 define <8 x i32> @binop_shuffle_vpsravd(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: binop_shuffle_vpsravd:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
-; CHECK-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
 ; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
 ; CHECK-NEXT:    retq
   %shuffle0 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   %shuffle1 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@@ -159,10 +153,7 @@ define <8 x i32> @binop_shuffle_vpsravd(<8 x i32> %a0, <8 x i32> %a1) {
 define <4 x i64> @binop_shuffle_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: binop_shuffle_vpsrlvq:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,2,1,0]
 ; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
-; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
 ; CHECK-NEXT:    retq
   %shuffle0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   %shuffle1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

diff  --git a/llvm/test/CodeGen/X86/xop-shifts.ll b/llvm/test/CodeGen/X86/xop-shifts.ll
index 3916fc7d6b12..83dcf9ce0d1e 100644
--- a/llvm/test/CodeGen/X86/xop-shifts.ll
+++ b/llvm/test/CodeGen/X86/xop-shifts.ll
@@ -31,16 +31,13 @@ define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
 }
 
 ;
-; isBinOp Handling (TODO)
+; isBinOp Handling
 ;
 
 define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: binop_shuffle_vpshaw:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; CHECK-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; CHECK-NEXT:    retq
   %shuffle0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   %shuffle1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -49,6 +46,7 @@ define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
   ret <8 x i16> %result
 }
 
+; TODO - canonicalizeShuffleWithBinOps - handle scaled shuffle masks.
 define <2 x i64> @binop_shuffle_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: binop_shuffle_vpshlq:
 ; CHECK:       # %bb.0: