[llvm] r357644 - [x86] fold shuffles of h-ops that have an undef operand
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 3 15:40:35 PDT 2019
Author: spatel
Date: Wed Apr 3 15:40:35 2019
New Revision: 357644
URL: http://llvm.org/viewvc/llvm-project?rev=357644&view=rev
Log:
[x86] fold shuffles of h-ops that have an undef operand
If an operand is undef, we can assume it's the same as the
other operand.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=357644&r1=357643&r2=357644&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Apr 3 15:40:35 2019
@@ -32880,8 +32880,8 @@ static SDValue foldShuffleOfHorizOp(SDNo
// lanes of each operand as:
// v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3]
// ...similarly for v2f64 and v8i16.
- // TODO: Handle UNDEF operands.
- if (HOp.getOperand(0) != HOp.getOperand(1))
+ if (!HOp.getOperand(0).isUndef() && !HOp.getOperand(1).isUndef() &&
+ HOp.getOperand(0) != HOp.getOperand(1))
return SDValue();
// When the operands of a horizontal math op are identical, the low half of
Modified: llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll?rev=357644&r1=357643&r2=357644&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll (original)
+++ llvm/trunk/test/CodeGen/X86/haddsub-shuf.ll Wed Apr 3 15:40:35 2019
@@ -381,7 +381,6 @@ define <4 x double> @hadd_v4f64_scalar_s
; AVX-LABEL: hadd_v4f64_scalar_splat:
; AVX: # %bb.0:
; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0
-; AVX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX-NEXT: retq
%a0 = extractelement <4 x double> %a, i32 0
%a1 = extractelement <4 x double> %a, i32 1
More information about the llvm-commits
mailing list