[llvm] a22ecb4 - [PowerPC] Fix i64 to vector lowering on big endian

Wed Jul 7 20:06:25 PDT 2021

Author: Qiu Chaofan
Date: 2021-07-08T11:05:09+08:00
New Revision: a22ecb4508288f6900ad9216ef1490ab72ad68ed

URL: https://github.com/llvm/llvm-project/commit/a22ecb4508288f6900ad9216ef1490ab72ad68ed
DIFF: https://github.com/llvm/llvm-project/commit/a22ecb4508288f6900ad9216ef1490ab72ad68ed.diff

LOG: [PowerPC] Fix i64 to vector lowering on big endian

Lowering for scalar to vector would skip if current subtarget is big
endian and the scalar is larger or equal than 64 bits. However there's
some issue in implementation that SToVRHS may refer to SToVLHS's scalar
size if SToVLHS is present, which leads to some crash.o

Reviewed By: nemanjai, shchenz

Differential Revision: https://reviews.llvm.org/D105094

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 99b8cd5d20d3..39cf24b00ac3 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14524,18 +14524,15 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
                             : SToVRHS.getValueType().getVectorNumElements();
     int NumEltsOut = ShuffV.size();
-    unsigned InElemSizeInBits =
-        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits()
-                : SToVRHS.getValueType().getScalarSizeInBits();
-    unsigned OutElemSizeInBits = SToVLHS
-                                     ? LHS.getValueType().getScalarSizeInBits()
-                                     : RHS.getValueType().getScalarSizeInBits();
-
     // The width of the "valid lane" (i.e. the lane that contains the value that
     // is vectorized) needs to be expressed in terms of the number of elements
     // of the shuffle. It is thereby the ratio of the values before and after
     // any bitcast.
-    unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits;
+    unsigned ValidLaneWidth =
+        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
+                      LHS.getValueType().getScalarSizeInBits()
+                : SToVRHS.getValueType().getVectorNumElements() /
+                      RHS.getValueType().getScalarSizeInBits();
 
     // Initially assume that neither input is permuted. These will be adjusted
     // accordingly if either input is.
@@ -14548,9 +14545,10 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
     // ISD::SCALAR_TO_VECTOR.
     // On big endian systems, this only makes sense for element sizes smaller
     // than 64 bits since for 64-bit elements, all instructions already put
-    // the value into element zero.
+    // the value into element zero. Since scalar size of LHS and RHS may 
diff er
+    // after isScalarToVec, this should be checked using their own sizes.
     if (SToVLHS) {
-      if (!IsLittleEndian && InElemSizeInBits >= 64)
+      if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
         return Res;
       // Set up the values for the shuffle vector fixup.
       LHSMaxIdx = NumEltsOut / NumEltsIn;
@@ -14560,7 +14558,7 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
       LHS = SToVLHS;
     }
     if (SToVRHS) {
-      if (!IsLittleEndian && InElemSizeInBits >= 64)
+      if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
         return Res;
       RHSMinIdx = NumEltsOut;
       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;

diff  --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 6a251e076005..27ee2fda1f1b 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2561,3 +2561,36 @@ entry:
   ret double %vecext
 ; FIXME: add check patterns when variable element extraction is implemented
 }
+
+; To check when LHS is i32 to vector and RHS is i64 to vector,
+; the combination should be skipped properly.
+define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
+; CHECK-LABEL: buildi2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r4, r4, 32
+; CHECK-NEXT:    mtfprd f1, r3
+; CHECK-NEXT:    mtfprd f0, r4
+; CHECK-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildi2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtfprwz f0, r4
+; CHECK-LE-NEXT:    mtfprd f1, r3
+; CHECK-LE-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildi2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    sldi 4, 4, 32
+; CHECK-AIX-NEXT:    mtfprd 1, 3
+; CHECK-AIX-NEXT:    mtfprd 0, 4
+; CHECK-AIX-NEXT:    xxmrghd 34, 0, 1
+; CHECK-AIX-NEXT:    blr
+entry:
+  %lhs.i32 = insertelement <4 x i32> undef, i32 %arg1, i32 0
+  %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+  %lhs = bitcast <4 x i32> %lhs.i32 to <2 x i64>
+  %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle
+}