[llvm-commits] [llvm] r145921 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/avx-vperm2f128.ll test/CodeGen/X86/avx2-vperm2i128.ll
Craig Topper
craig.topper at gmail.com
Mon Dec 5 20:59:07 PST 2011
Author: ctopper
Date: Mon Dec 5 22:59:07 2011
New Revision: 145921
URL: http://llvm.org/viewvc/llvm-project?rev=145921&view=rev
Log:
Merge isSHUFPMask and isCommutedSHUFPMask into single function that can do both. Do the same for the 256-bit version. Use loops to reduce size of isVSHUFPYMask. Fix test cases that were incorrectly passing due to isCommutedSHUFPMask not checking for the vector being 128-bit. This caused some 256-bit shuffles to be incorrectly commuted.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll
llvm/trunk/test/CodeGen/X86/avx2-vperm2i128.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=145921&r1=145920&r2=145921&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 5 22:59:07 2011
@@ -3251,7 +3251,7 @@
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// VSHUFPSY.
static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool HasAVX) {
+ bool HasAVX, bool Commuted = false) {
int NumElems = VT.getVectorNumElements();
if (!HasAVX || VT.getSizeInBits() != 256)
@@ -3279,114 +3279,27 @@
//
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
-
- // For VSHUFPSY, the mask of the second half must be the same as the first
- // but with the appropriate offsets. This works in the same way as
- // VPERMILPS works with masks.
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- if (NumElems == 4)
- continue;
- // VSHUFPSY handling
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
- }
- for (int i = QuarterSize*3; i < NumElems; ++i) {
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (NumElems == 4)
- continue;
- // VSHUFPSY handling
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
- }
-
- return true;
-}
-
-/// isCommutedVSHUFP() - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- bool HasAVX) {
- int NumElems = VT.getVectorNumElements();
-
- if (!HasAVX || VT.getSizeInBits() != 256)
- return false;
-
- if (NumElems != 4 && NumElems != 8)
- return false;
-
- // VSHUFPSY divides the resulting vector into 4 chunks.
- // The sources are also splitted into 4 chunks, and each destination
- // chunk must come from a different source chunk.
- //
- // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0
- // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9
- //
- // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
- // Y3..Y0, Y3..Y0, X3..X0, X3..X0
- //
- // VSHUFPDY divides the resulting vector into 4 chunks.
- // The sources are also splitted into 4 chunks, and each destination
- // chunk must come from a different source chunk.
- //
- // SRC1 => X3 X2 X1 X0
- // SRC2 => Y3 Y2 Y1 Y0
- //
- // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
- //
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
-
- // For VSHUFPSY, the mask of the second half must be the same as the first
- // but with the appropriate offsets. This works in the same way as
- // VPERMILPS works with masks.
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
- if (NumElems == 4)
- continue;
- // VSHUFPSY handling
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
- }
- for (int i = QuarterSize*3; i < NumElems; ++i) {
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- if (NumElems == 4)
- continue;
- // VSHUFPSY handling
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
+ unsigned QuarterSize = NumElems/4;
+ unsigned HalfSize = QuarterSize*2;
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned LaneStart = l*HalfSize;
+ for (unsigned s = 0; s != 2; ++s) {
+ unsigned QuarterStart = s*QuarterSize;
+ unsigned Src = (Commuted) ? (1-s) : s;
+ unsigned SrcStart = Src*NumElems + LaneStart;
+ for (unsigned i = 0; i != QuarterSize; ++i) {
+ int Idx = Mask[i+QuarterStart+LaneStart];
+ if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize))
+ return false;
+ // For VSHUFPSY, the mask of the second half must be the same as the first
+ // but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
+ if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0)
+ continue;
+ if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize))
+ return false;
+ }
+ }
}
return true;
@@ -3436,9 +3349,11 @@
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
-/// SHUFPS and SHUFPD.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool Commuted = false) {
+ unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
return false;
@@ -3446,12 +3361,14 @@
if (NumElems != 2 && NumElems != 4)
return false;
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
- return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ unsigned Half = NumElems / 2;
+ unsigned SrcStart = Commuted ? NumElems : 0;
+ for (unsigned i = 0; i != Half; ++i)
+ if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
+ return false;
+ SrcStart = Commuted ? 0 : NumElems;
+ for (unsigned i = Half; i != NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
return false;
return true;
@@ -3463,26 +3380,6 @@
return ::isSHUFPMask(M, N->getValueType(0));
}
-/// isCommutedSHUFPMask - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
-
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
- return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
- return false;
- return true;
-}
-
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
@@ -6780,8 +6677,8 @@
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isCommutedSHUFPMask(M, VT) ||
- isCommutedVSHUFPYMask(M, VT, HasAVX)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) ||
+ isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -11272,7 +11169,7 @@
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT) ||
- isCommutedSHUFPMask(Mask, VT));
+ isSHUFPMask(Mask, VT, /* Commuted */ true));
}
return false;
}
Modified: llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll?rev=145921&r1=145920&r2=145921&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vperm2f128.ll Mon Dec 5 22:59:07 2011
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; CHECK: _A
; CHECK: vperm2f128 $1
define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
entry:
@@ -7,6 +8,7 @@
ret <8 x float> %shuffle
}
+; CHECK: _B
; CHECK: vperm2f128 $48
define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
entry:
@@ -14,6 +16,7 @@
ret <8 x float> %shuffle
}
+; CHECK: _C
; CHECK: vperm2f128 $0
define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
entry:
@@ -21,6 +24,7 @@
ret <8 x float> %shuffle
}
+; CHECK: _D
; CHECK: vperm2f128 $17
define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
entry:
@@ -28,6 +32,7 @@
ret <8 x float> %shuffle
}
+; CHECK: _E
; CHECK: vperm2f128 $17
define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
entry:
@@ -35,7 +40,8 @@
ret <32 x i8> %shuffle
}
-; CHECK: vperm2f128 $33
+; CHECK: _E2
+; CHECK: vperm2f128 $3
define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -44,6 +50,7 @@
;;;; Cases with undef indicies mixed in the mask
+; CHECK: _F
; CHECK: vperm2f128 $33
define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
entry:
Modified: llvm/trunk/test/CodeGen/X86/avx2-vperm2i128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vperm2i128.ll?rev=145921&r1=145920&r2=145921&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vperm2i128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vperm2i128.ll Mon Dec 5 22:59:07 2011
@@ -9,7 +9,7 @@
ret <32 x i8> %shuffle
}
-; CHECK: vperm2i128 $33
+; CHECK: vperm2i128 $3
define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
entry:
; add forces execution domain
More information about the llvm-commits
mailing list