[llvm] r197492 - Fix for PR18045:

Tue Dec 17 04:07:34 PST 2013

Author: dyatkovskiy
Date: Tue Dec 17 06:07:33 2013
New Revision: 197492

URL: http://llvm.org/viewvc/llvm-project?rev=197492&view=rev
Log:
Fix for PR18045:
http://llvm.org/bugs/show_bug.cgi?id=18045

Short issue description:
For X86 machines with sse < sse4.1 we got failures for some
particular load/store vector sequences:

$ clang-trunk -m32 -O2 test-case.c
fatal error: error in backend: Cannot select: 0x4200920: v4i32,ch = load 0x41d6ab0, 0x4205850,
      0x41dcb10<LD16[getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)](align=4)> [ORD=82]
      [ID=58]
  0x4205850: i32 = X86ISD::Wrapper 0x41d5490 [ORD=26] [ID=43]
    0x41d5490: i32 = TargetGlobalAddress<[4 x i32]* @e> 0 [ORD=26] [ID=23]
  0x41dcb10: i32 = undef [ID=2]

The reason is that EltsFromConsecutiveLoads could emit such load instruction
both before and after legalize stage. Though this instruction is not legal for
machines with SSSE3 and lower.

The fix: In EltsFromConsecutiveLoads, if we have passed legalize stage, we
check whether nodes it emits are legal. 

P.S.: If you get failure in time from 12:00 and till 22:00 (UTC-8),
perhaps I'll slow with response, so you better reject this commit. Thanks!


Added:
    llvm/trunk/test/CodeGen/X86/v4i32load-crash.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=197492&r1=197491&r2=197492&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 17 06:07:33 2013
@@ -5426,7 +5426,8 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MV
 /// rather than undef via VZEXT_LOAD, but we do not detect that case today.
 /// There's even a handy isZeroNode for that purpose.
 static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
-                                        SDLoc &DL, SelectionDAG &DAG) {
+                                        SDLoc &DL, SelectionDAG &DAG,
+                                        bool isAfterLegalize) {
   EVT EltVT = VT.getVectorElementType();
   unsigned NumElems = Elts.size();
 
@@ -5462,7 +5463,13 @@ static SDValue EltsFromConsecutiveLoads(
   // load of the entire vector width starting at the base pointer.  If we found
   // consecutive loads for the low half, generate a vzext_load node.
   if (LastLoadedElt == NumElems - 1) {
+
+    if (isAfterLegalize &&
+        !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
+      return SDValue();
+
     SDValue NewLd = SDValue();
+
     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
       NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
                           LDBase->getPointerInfo(),
@@ -6106,7 +6113,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDV
       V[i] = Op.getOperand(i);
 
     // Check for elements which are consecutive loads.
-    SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+    SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
     if (LD.getNode())
       return LD;
 
@@ -16379,7 +16386,7 @@ static SDValue PerformShuffleCombine(SDN
   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
     Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
 
-  return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+  return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
 }
 
 /// PerformTruncateCombine - Converts truncate operation to

Added: llvm/trunk/test/CodeGen/X86/v4i32load-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/v4i32load-crash.ll?rev=197492&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/v4i32load-crash.ll (added)
+++ llvm/trunk/test/CodeGen/X86/v4i32load-crash.ll Tue Dec 17 06:07:33 2013
@@ -0,0 +1,27 @@
+; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s
+
+;PR18045:
+;Issue of selection for 'v4i32 load'.
+;This instruction is not legal for X86 CPUs with sse < 'sse4.1'.
+;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads
+;static function after legilize stage.
+
+ at e = external global [4 x i32], align 4
+ at f = external global [4 x i32], align 4
+
+; Function Attrs: nounwind
+define void @fn3(i32 %el) {
+entry:
+  %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
+  %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
+  %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
+  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
+  %4 = insertelement <4 x i32> undef, i32 %0, i32 0
+  %5 = insertelement <4 x i32> %4, i32 %1, i32 1
+  %6 = insertelement <4 x i32> %5, i32 %2, i32 2
+  %7 = insertelement <4 x i32> %6, i32 %3, i32 3
+  %8 = add <4 x i32> %6, %7
+  store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*)
+  ret void
+}
+