[llvm] 057db20 - [X86] combineAndnp - constant fold ANDNP(C, X) -> AND(~C, X)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 21 04:31:15 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-21T12:31:01+01:00
New Revision: 057db2002bb3d79429db3c5fe436c8cefc50cb25
URL: https://github.com/llvm/llvm-project/commit/057db2002bb3d79429db3c5fe436c8cefc50cb25
DIFF: https://github.com/llvm/llvm-project/commit/057db2002bb3d79429db3c5fe436c8cefc50cb25.diff
LOG: [X86] combineAndnp - constant fold ANDNP(C,X) -> AND(~C,X)
If the LHS op has a single use then using the more general AND op is likely to allow commutation, load folding, generic folds etc.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-udiv.ll
llvm/test/CodeGen/X86/insert-into-constant-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2c192db9688c..70cd94e58c07 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50965,6 +50965,20 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
if (SDValue Not = IsNOT(N0, DAG))
return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);
+ // Constant fold NOT(N0) to allow us to use AND.
+ // TODO: Do this in IsNOT with suitable oneuse checks?
+ if (getTargetConstantFromNode(N0) && N0->hasOneUse()) {
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (getTargetConstantBitsFromNode(N0, VT.getScalarSizeInBits(), UndefElts,
+ EltBits)) {
+ for (APInt &Elt : EltBits)
+ Elt = ~Elt;
+ SDValue Not = getConstVector(EltBits, UndefElts, VT, DAG, SDLoc(N));
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, Not, N1);
+ }
+ }
+
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll
index f657430239b8..6c0401624243 100644
--- a/llvm/test/CodeGen/X86/combine-udiv.ll
+++ b/llvm/test/CodeGen/X86/combine-udiv.ll
@@ -690,21 +690,20 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
define <8 x i16> @pr38477(<8 x i16> %a0) {
; SSE2-LABEL: pr38477:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4957,57457,4103,16385,35545,2048,2115]
-; SSE2-NEXT: pmulhuw %xmm0, %xmm2
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: psubw %xmm2, %xmm0
-; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: paddw %xmm2, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,0,65535]
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pandn %xmm0, %xmm3
-; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: por %xmm3, %xmm1
-; SSE2-NEXT: por %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4957,57457,4103,16385,35545,2048,2115]
+; SSE2-NEXT: pmulhuw %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psubw %xmm1, %xmm2
+; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: paddw %xmm1, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm2, %xmm3
+; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: pr38477:
diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
index 749ca979b491..7fc1c173ba90 100644
--- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
+++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll
@@ -13,17 +13,15 @@
define <16 x i8> @elt0_v16i8(i8 %x) {
; X86-SSE2-LABEL: elt0_v16i8:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X86-SSE2-NEXT: andnps %xmm1, %xmm0
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE2-NEXT: retl
;
; X64-SSE2-LABEL: elt0_v16i8:
; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: movd %edi, %xmm1
-; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X64-SSE2-NEXT: pandn %xmm1, %xmm0
+; X64-SSE2-NEXT: movd %edi, %xmm0
+; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-SSE2-NEXT: retq
;
More information about the llvm-commits
mailing list