[llvm] d79e7dc - [DAG] SimplifyDemandedVectorElts - and/mul(x, y) - if a demanded element of y is known zero then we don't need to demand it in x
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 9 08:27:49 PDT 2022
Author: Simon Pilgrim
Date: 2022-08-09T16:24:08+01:00
New Revision: d79e7dc939a3e9c422ecb4f7de17ca7a6d827ba7
URL: https://github.com/llvm/llvm-project/commit/d79e7dc939a3e9c422ecb4f7de17ca7a6d827ba7
DIFF: https://github.com/llvm/llvm-project/commit/d79e7dc939a3e9c422ecb4f7de17ca7a6d827ba7.diff
LOG: [DAG] SimplifyDemandedVectorElts - and/mul(x,y) - if a demanded element of y is known zero then we don't need to demand it in x
This fixes most of the remaining regressions from the fixes in rG293899c64b75
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5bfb91a1271e5..afdbbd83b4ee9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3389,10 +3389,16 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
+ // If we know that a demanded element was zero in Op1 we don't need to
+ // demand it in Op0 - its guaranteed to be zero.
+ APInt DemandedElts0 = DemandedElts & ~SrcZero;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
TLO, Depth + 1))
return true;
+ KnownUndef &= DemandedElts0;
+ KnownZero &= DemandedElts0;
+
// If every element pair has a zero/undef then just fold to zero.
// fold (and x, undef) -> 0 / (and x, 0) -> 0
// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 01f43412807ea..dc595801d74b5 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -552,7 +552,6 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
-; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
@@ -571,7 +570,6 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
-; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index e0a67bbf67fd6..bfea592ecfb80 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -304,7 +304,6 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
-; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
@@ -322,7 +321,6 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index f4531868fdd1e..e9dd400c1f78a 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -547,38 +547,36 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
-; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; AVX512F-NEXT: vpsllw $1, %ymm5, %ymm5
-; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5
+; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
+; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
+; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3
+; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3
; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
-; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
+; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
-; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; AVX512VL-NEXT: vpsllw $1, %ymm5, %ymm5
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5
+; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
+; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
+; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3
; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
-; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
-; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
-; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
+; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index 31c856f5b887b..3a533f6bc20fd 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -295,37 +295,35 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
-; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
-; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm5
-; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm5
-; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm6
-; AVX512F-NEXT: vpsllw %xmm3, %ymm6, %ymm3
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
+; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
-; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm4, %ymm2
-; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpsllw $1, %ymm4, %ymm2
+; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
-; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
-; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm5
-; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm5
-; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm6
-; AVX512VL-NEXT: vpsllw %xmm3, %ymm6, %ymm3
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
+; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm4, %ymm2
-; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpsllw $1, %ymm4, %ymm2
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2
+; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0
+; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatvar_funnnel_v32i16:
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index b7e69e4ac4c5d..4402957b4d74f 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -317,7 +317,6 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512F-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
-; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0
@@ -335,7 +334,6 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
; AVX512VL-NEXT: vpsrlw $1, %ymm0, %ymm6
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm6, %ymm3
; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3
-; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpsllw %xmm1, %ymm4, %ymm2
; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0
More information about the llvm-commits
mailing list