[llvm] 9a368d2 - [X86][SSE] shuffle(hop,hop) - canonicalize unary hop(x,x) shuffle masks
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 10 08:09:39 PDT 2020
Author: Simon Pilgrim
Date: 2020-08-10T16:09:27+01:00
New Revision: 9a368d2b0088a8b7209c4a435427dfe8ad62744f
URL: https://github.com/llvm/llvm-project/commit/9a368d2b0088a8b7209c4a435427dfe8ad62744f
DIFF: https://github.com/llvm/llvm-project/commit/9a368d2b0088a8b7209c4a435427dfe8ad62744f.diff
LOG: [X86][SSE] shuffle(hop,hop) - canonicalize unary hop(x,x) shuffle masks
If a shuffle is referring to both the lower and upper half lanes of an unary horizontal op, then canonicalize the mask to only refer to the lower half.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/haddsub-undef.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0e665404ffbe..8ea98649d352 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35880,6 +35880,25 @@ static SDValue combineShuffleWithHorizOp(SDValue N, MVT VT, const SDLoc &DL,
if (!isHoriz && !isPack)
return SDValue();
+ // Canonicalize unary horizontal ops to only refer to lower halves.
+ if (TargetMask.size() == VT0.getVectorNumElements()) {
+ int NumElts = VT0.getVectorNumElements();
+ int NumLanes = VT0.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+ int NumHalfEltsPerLane = NumEltsPerLane / 2;
+ for (int i = 0; i != NumElts; ++i) {
+ int &M = TargetMask[i];
+ if (isUndefOrZero(M))
+ continue;
+ if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) &&
+ (M % NumEltsPerLane) >= NumHalfEltsPerLane)
+ M -= NumHalfEltsPerLane;
+ if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) &&
+ ((M - NumElts) % NumEltsPerLane) >= NumHalfEltsPerLane)
+ M -= NumHalfEltsPerLane;
+ }
+ }
+
SmallVector<int, 16> TargetMask128, WideMask128;
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
scaleShuffleElements(TargetMask128, 2, WideMask128)) {
diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll
index b5f0d48dbe38..f950d0b6a723 100644
--- a/llvm/test/CodeGen/X86/haddsub-undef.ll
+++ b/llvm/test/CodeGen/X86/haddsub-undef.ll
@@ -1015,9 +1015,7 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) {
;
; SSE-FAST-LABEL: PR34724_add_v4f32_0u23:
; SSE-FAST: # %bb.0:
-; SSE-FAST-NEXT: haddps %xmm0, %xmm0
-; SSE-FAST-NEXT: haddps %xmm1, %xmm1
-; SSE-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,3]
+; SSE-FAST-NEXT: haddps %xmm1, %xmm0
; SSE-FAST-NEXT: retq
;
; AVX-SLOW-LABEL: PR34724_add_v4f32_0u23:
@@ -1034,9 +1032,7 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) {
;
; AVX-FAST-LABEL: PR34724_add_v4f32_0u23:
; AVX-FAST: # %bb.0:
-; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
-; AVX-FAST-NEXT: vhaddps %xmm1, %xmm1, %xmm1
-; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,3]
+; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; AVX-FAST-NEXT: retq
%3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%4 = fadd <4 x float> %3, %0
More information about the llvm-commits
mailing list