[llvm] r218176 - [x86] Refactor the code for emitting INSERTPS to reuse the zeroable mask

Chandler Carruth chandlerc at gmail.com
Fri Sep 19 20:57:01 PDT 2014


Author: chandlerc
Date: Fri Sep 19 22:57:01 2014
New Revision: 218176

URL: http://llvm.org/viewvc/llvm-project?rev=218176&view=rev
Log:
[x86] Refactor the code for emitting INSERTPS to reuse the zeroable mask
analysis used elsewhere. This removes the last duplicate of this logic.
Also simplify the code here quite a bit. No functionality changed.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=218176&r1=218175&r2=218176&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 19 22:57:01 2014
@@ -7802,35 +7802,25 @@ static SDValue lowerV4F32VectorShuffle(S
       // When using INSERTPS we can zero any lane of the destination. Collect
       // the zero inputs into a mask and drop them from the lanes of V1 which
       // actually need to be present as inputs to the INSERTPS.
-      unsigned ZMask = 0;
-      if (ISD::isBuildVectorAllZeros(V1.getNode())) {
-        ZMask = 0xF ^ (1 << V2Index);
-      } else if (V1.getOpcode() == ISD::BUILD_VECTOR) {
-        for (int i = 0; i < 4; ++i) {
-          int M = Mask[i];
-          if (M >= 4)
-            continue;
-          if (M > -1) {
-            SDValue Input = V1.getOperand(M);
-            if (Input.getOpcode() != ISD::UNDEF &&
-                !X86::isZeroNode(Input)) {
-              // A non-zero input!
-              ZMask = 0;
-              break;
-            }
-          }
-          ZMask |= 1 << i;
-        }
-      }
+      SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
       // Synthesize a shuffle mask for the non-zero and non-v2 inputs.
-      int InsertShuffleMask[4] = {-1, -1, -1, -1};
+      bool InsertNeedsShuffle = false;
+      unsigned ZMask = 0;
       for (int i = 0; i < 4; ++i)
-        if (i != V2Index && (ZMask & (1 << i)) == 0)
-          InsertShuffleMask[i] = Mask[i];
+        if (i != V2Index) {
+          if (Zeroable[i]) {
+            ZMask |= 1 << i;
+          } else if (Mask[i] != i) {
+            InsertNeedsShuffle = true;
+            break;
+          }
+        }
 
-      if (isNoopShuffleMask(InsertShuffleMask)) {
-        // Replace V1 with undef if nothing from V1 survives the INSERTPS.
+      // We don't want to use INSERTPS or other insertion techniques if it will
+      // require shuffling anyways.
+      if (!InsertNeedsShuffle) {
+        // If all of V1 is zeroable, replace it with undef.
         if ((ZMask | 1 << V2Index) == 0xF)
           V1 = DAG.getUNDEF(MVT::v4f32);
 





More information about the llvm-commits mailing list