[llvm] 588d97e - [X86] getTargetVShiftNode - peek through any zext node
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 4 09:42:01 PST 2022
Author: Simon Pilgrim
Date: 2022-03-04T17:41:45Z
New Revision: 588d97e2461964ac5b821b946276c5600e0139a3
URL: https://github.com/llvm/llvm-project/commit/588d97e2461964ac5b821b946276c5600e0139a3
DIFF: https://github.com/llvm/llvm-project/commit/588d97e2461964ac5b821b946276c5600e0139a3.diff
LOG: [X86] getTargetVShiftNode - peek through any zext node
If the shift amount has been zero-extended, peek through as this might help us further canonicalize the shift amount.
Fixes regression mentioned in rG147cfcbef1255ba2b4875b76708dab1a685085f5
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 12b9195e11e20..c8371d0699bda 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25744,6 +25744,16 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask);
}
+ // Peek through any zext node if we can get back to a 128-bit source.
+ if (AmtVT.getScalarSizeInBits() == 64 &&
+ (ShAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ ShAmt.getOperand(0).getValueType().isSimple() &&
+ ShAmt.getOperand(0).getValueType().is128BitVector()) {
+ ShAmt = ShAmt.getOperand(0);
+ AmtVT = ShAmt.getSimpleValueType();
+ }
+
// See if we can mask off the upper elements using the existing source node.
// The shift uses the entire lower 64-bits of the amount vector, so no need to
// do this for vXi64 types.
@@ -25784,10 +25794,13 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
// Zero-extend bottom element to v2i64 vector type, either by extension or
// shuffle masking.
if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) {
- if (Subtarget.hasSSE41())
+ if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST ||
+ ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) {
+ ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+ } else if (Subtarget.hasSSE41()) {
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
- else {
+ } else {
SDValue ByteShift = DAG.getTargetConstant(
(128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 53e5fa4e1f69b..e9a970157b053 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -2105,18 +2105,16 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; AVX1-LABEL: PR52719:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm1
-; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
-; AVX1-NEXT: vpsrlq %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR52719:
@@ -2170,28 +2168,23 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
;
; X86-AVX1-LABEL: PR52719:
; X86-AVX1: # %bb.0:
-; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; X86-AVX1-NEXT: # xmm3 = mem[0,0]
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2
-; X86-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: # xmm2 = mem[0,0]
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: PR52719:
; X86-AVX2: # %bb.0:
-; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1
-; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
More information about the llvm-commits
mailing list