[llvm] r334007 - [X86][SSE] Add target shuffle support to X86TargetLowering::computeKnownBitsForTargetNode

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 5 03:52:30 PDT 2018


Author: rksimon
Date: Tue Jun  5 03:52:29 2018
New Revision: 334007

URL: http://llvm.org/viewvc/llvm-project?rev=334007&view=rev
Log:
[X86][SSE] Add target shuffle support to X86TargetLowering::computeKnownBitsForTargetNode

Ideally we'd use resolveTargetShuffleInputs to handle faux shuffles as well but:
(a) that code path doesn't handle general/pre-legalized ops/types very well.
(b) I'm concerned about the compute time as they recurse to calls to computeKnownBits/ComputeNumSignBits which would need depth limiting somehow.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/pr35918.ll
    llvm/trunk/test/CodeGen/X86/psubus.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=334007&r1=334006&r2=334007&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jun  5 03:52:29 2018
@@ -28795,6 +28795,57 @@ void X86TargetLowering::computeKnownBits
     Known.Zero.setBitsFrom(8);
     break;
   }
+
+  // Handle target shuffles.
+  // TODO - use resolveTargetShuffleInputs once we can limit recursive depth.
+  if (isTargetShuffle(Opc)) {
+    bool IsUnary;
+    SmallVector<int, 64> Mask;
+    SmallVector<SDValue, 2> Ops;
+    if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask,
+                             IsUnary)) {
+      unsigned NumOps = Ops.size();
+      unsigned NumElts = VT.getVectorNumElements();
+      if (Mask.size() == NumElts) {
+        SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
+        Known.Zero.setAllBits(); Known.One.setAllBits();
+        for (unsigned i = 0; i != NumElts; ++i) {
+          if (!DemandedElts[i])
+            continue;
+          int M = Mask[i];
+          if (M == SM_SentinelUndef) {
+            // For UNDEF elements, we don't know anything about the common state
+            // of the shuffle result.
+            Known.resetAll();
+            break;
+          } else if (M == SM_SentinelZero) {
+            Known.One.clearAllBits();
+            continue;
+          }
+          assert(0 <= M && (unsigned)M < (NumOps * NumElts) &&
+                 "Shuffle index out of range");
+
+          unsigned OpIdx = (unsigned)M / NumElts;
+          unsigned EltIdx = (unsigned)M % NumElts;
+          if (Ops[OpIdx].getValueType() != VT) {
+            // TODO - handle target shuffle ops with different value types.
+            Known.resetAll();
+            break;
+          }
+          DemandedOps[OpIdx].setBit(EltIdx);
+        }
+        // Known bits are the values that are shared by every demanded element.
+        for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) {
+          if (!DemandedOps[i])
+            continue;
+          KnownBits Known2;
+          DAG.computeKnownBits(Ops[i], Known2, DemandedOps[i], Depth + 1);
+          Known.One &= Known2.One;
+          Known.Zero &= Known2.Zero;
+        }
+      }
+    }
+  }
 }
 
 unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(

Modified: llvm/trunk/test/CodeGen/X86/pr35918.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr35918.ll?rev=334007&r1=334006&r2=334007&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr35918.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr35918.ll Tue Jun  5 03:52:29 2018
@@ -59,8 +59,7 @@ define void @fetch_r16g16_snorm_unorm8(<
 ; X64-SKYLAKE-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-SKYLAKE-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
 ; X64-SKYLAKE-NEXT:    vpsrld $7, %xmm0, %xmm0
-; X64-SKYLAKE-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
-; X64-SKYLAKE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2],zero,zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
+; X64-SKYLAKE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4],zero,zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
 ; X64-SKYLAKE-NEXT:    vmovd %xmm0, %eax
 ; X64-SKYLAKE-NEXT:    orl $-16777216, %eax # imm = 0xFF000000
 ; X64-SKYLAKE-NEXT:    movl %eax, (%rdi)

Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=334007&r1=334006&r2=334007&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Tue Jun  5 03:52:29 2018
@@ -1620,15 +1620,10 @@ define <8 x i16> @psubus_8i64_max(<8 x i
 ; SSE41-NEXT:    packusdw %xmm3, %xmm7
 ; SSE41-NEXT:    packusdw %xmm4, %xmm7
 ; SSE41-NEXT:    psubusw %xmm7, %xmm10
-; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm10[0],zero,zero,zero,xmm10[1],zero,zero,zero
-; SSE41-NEXT:    movdqa %xmm10, %xmm1
-; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[12,13],zero,zero,zero,zero,zero,zero,xmm1[14,15],zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    movdqa %xmm10, %xmm2
-; SSE41-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[8,9],zero,zero,zero,zero,zero,zero,xmm2[10,11],zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    packusdw %xmm1, %xmm2
-; SSE41-NEXT:    pshufb {{.*#+}} xmm10 = xmm10[4,5],zero,zero,zero,zero,zero,zero,xmm10[6,7],zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm1[4],xmm10[5],xmm1[5],xmm10[6],xmm1[6],xmm10[7],xmm1[7]
 ; SSE41-NEXT:    packusdw %xmm10, %xmm0
-; SSE41-NEXT:    packusdw %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: psubus_8i64_max:
@@ -1656,13 +1651,10 @@ define <8 x i16> @psubus_8i64_max(<8 x i
 ; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = xmm0[12,13],zero,zero,zero,zero,zero,zero,xmm0[14,15],zero,zero,zero,zero,zero,zero
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm0[8,9],zero,zero,zero,zero,zero,zero,xmm0[10,11],zero,zero,zero,zero,zero,zero
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5],zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
-; AVX1-NEXT:    vpackusdw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;




More information about the llvm-commits mailing list