[PATCH] D105094: [PowerPC] Fix i64 to vector lowering on big endian
Qiu Chaofan via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 5 00:26:00 PDT 2021
qiucf updated this revision to Diff 356442.
qiucf added a comment.
Use a simpler test case and add comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D105094/new/
https://reviews.llvm.org/D105094
Files:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
Index: llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2561,3 +2561,36 @@
ret double %vecext
; FIXME: add check patterns when variable element extraction is implemented
}
+
+; To check when LHS is i32 to vector and RHS is i64 to vector,
+; the combination should be skipped properly.
+define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
+; CHECK-LABEL: buildi2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sldi r4, r4, 32
+; CHECK-NEXT: mtfprd f1, r3
+; CHECK-NEXT: mtfprd f0, r4
+; CHECK-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-NEXT: blr
+;
+; CHECK-LE-LABEL: buildi2:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: mtfprwz f0, r4
+; CHECK-LE-NEXT: mtfprd f1, r3
+; CHECK-LE-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-NEXT: blr
+;
+; CHECK-AIX-LABEL: buildi2:
+; CHECK-AIX: # %bb.0: # %entry
+; CHECK-AIX-NEXT: sldi 4, 4, 32
+; CHECK-AIX-NEXT: mtfprd 1, 3
+; CHECK-AIX-NEXT: mtfprd 0, 4
+; CHECK-AIX-NEXT: xxmrghd 34, 0, 1
+; CHECK-AIX-NEXT: blr
+entry:
+ %lhs.i32 = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %lhs = bitcast <4 x i32> %lhs.i32 to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14515,18 +14515,15 @@
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
: SToVRHS.getValueType().getVectorNumElements();
int NumEltsOut = ShuffV.size();
- unsigned InElemSizeInBits =
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits()
- : SToVRHS.getValueType().getScalarSizeInBits();
- unsigned OutElemSizeInBits = SToVLHS
- ? LHS.getValueType().getScalarSizeInBits()
- : RHS.getValueType().getScalarSizeInBits();
-
// The width of the "valid lane" (i.e. the lane that contains the value that
// is vectorized) needs to be expressed in terms of the number of elements
// of the shuffle. It is thereby the ratio of the values before and after
// any bitcast.
- unsigned ValidLaneWidth = InElemSizeInBits / OutElemSizeInBits;
+ unsigned ValidLaneWidth =
+ SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
+ LHS.getValueType().getScalarSizeInBits()
+ : SToVRHS.getValueType().getVectorNumElements() /
+ RHS.getValueType().getScalarSizeInBits();
// Initially assume that neither input is permuted. These will be adjusted
// accordingly if either input is.
@@ -14539,9 +14536,10 @@
// ISD::SCALAR_TO_VECTOR.
// On big endian systems, this only makes sense for element sizes smaller
// than 64 bits since for 64-bit elements, all instructions already put
- // the value into element zero.
+ // the value into element zero. Since scalar size of LHS and RHS may differ
+ // after isScalarToVec, this should be checked using their own sizes.
if (SToVLHS) {
- if (!IsLittleEndian && InElemSizeInBits >= 64)
+ if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
return Res;
// Set up the values for the shuffle vector fixup.
LHSMaxIdx = NumEltsOut / NumEltsIn;
@@ -14551,7 +14549,7 @@
LHS = SToVLHS;
}
if (SToVRHS) {
- if (!IsLittleEndian && InElemSizeInBits >= 64)
+ if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
return Res;
RHSMinIdx = NumEltsOut;
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D105094.356442.patch
Type: text/x-patch
Size: 4046 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210705/eec71923/attachment.bin>
More information about the llvm-commits
mailing list