[PATCH] D105094: [PowerPC] Fix i64 to vector lowering on big endian

Mon Jul 5 00:26:00 PDT 2021

qiucf updated this revision to Diff 356442.
qiucf added a comment.

Use a simpler test case and add comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105094/new/

https://reviews.llvm.org/D105094

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll


Index: llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
===================================================================

--- llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2561,3 +2561,36 @@
   ret double %vecext
 ; FIXME: add check patterns when variable element extraction is implemented
 }
+
+; To check when LHS is i32 to vector and RHS is i64 to vector,
+; the combination should be skipped properly.
+define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
+; CHECK-LABEL: buildi2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sldi r4, r4, 32
+; CHECK-NEXT:    mtfprd f1, r3
+; CHECK-NEXT:    mtfprd f0, r4
+; CHECK-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-NEXT:    blr
+;
+; CHECK-LE-LABEL: buildi2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mtfprwz f0, r4
+; CHECK-LE-NEXT:    mtfprd f1, r3
+; CHECK-LE-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-AIX-LABEL: buildi2:
+; CHECK-AIX:       # %bb.0: # %entry
+; CHECK-AIX-NEXT:    sldi 4, 4, 32
+; CHECK-AIX-NEXT:    mtfprd 1, 3
+; CHECK-AIX-NEXT:    mtfprd 0, 4
+; CHECK-AIX-NEXT:    xxmrghd 34, 0, 1
+; CHECK-AIX-NEXT:    blr
+entry:
+  %lhs.i32 = insertelement <4 x i32> undef, i32 %arg1, i32 0
+  %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+  %lhs = bitcast <4 x i32> %lhs.i32 to <2 x i64>
+  %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64> %shuffle
+}
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14515,18 +14515,15 @@
     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
                             : SToVRHS.getValueType().getVectorNumElements();
     int NumEltsOut = ShuffV.size();
-    unsigned InElemSizeInBits =
-        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits()
-                : SToVRHS.getValueType().getScalarSizeInBits();
-    unsigned OutElemSizeInBits = SToVLHS
-                                     ? LHS.getValueType().getScalarSizeInBits()
-                                     : RHS.getValueType().getScalarSizeInBits();
-
     // The width of the "valid lane" (i.e. the lane that contains the value that
     // is vectorized) needs to be expressed in terms of the number of elements
     // of the shuffle. It is thereby the ratio of the values before and after
     // any bitcast.
-    unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits;
+    unsigned ValidLaneWidth =
+        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
+                      LHS.getValueType().getScalarSizeInBits()
+                : SToVRHS.getValueType().getVectorNumElements() /
+                      RHS.getValueType().getScalarSizeInBits();
 
     // Initially assume that neither input is permuted. These will be adjusted
     // accordingly if either input is.
@@ -14539,9 +14536,10 @@
     // ISD::SCALAR_TO_VECTOR.
     // On big endian systems, this only makes sense for element sizes smaller
     // than 64 bits since for 64-bit elements, all instructions already put
-    // the value into element zero.
+    // the value into element zero. Since scalar size of LHS and RHS may differ
+    // after isScalarToVec, this should be checked using their own sizes.
     if (SToVLHS) {
-      if (!IsLittleEndian && InElemSizeInBits >= 64)
+      if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
         return Res;
       // Set up the values for the shuffle vector fixup.
       LHSMaxIdx = NumEltsOut / NumEltsIn;
@@ -14551,7 +14549,7 @@
       LHS = SToVLHS;
     }
     if (SToVRHS) {
-      if (!IsLittleEndian && InElemSizeInBits >= 64)
+      if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
         return Res;
       RHSMinIdx = NumEltsOut;
       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D105094.356442.patch
Type: text/x-patch
Size: 4046 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210705/eec71923/attachment.bin>