[llvm] ae8f929 - [AArch64] Use known zero bits when creating BIC

Wed Jul 5 07:42:41 PDT 2023

Author: David Green
Date: 2023-07-05T15:42:33+01:00
New Revision: ae8f929b93e685dcb0bf6d6d1999bf70758917f5

URL: https://github.com/llvm/llvm-project/commit/ae8f929b93e685dcb0bf6d6d1999bf70758917f5
DIFF: https://github.com/llvm/llvm-project/commit/ae8f929b93e685dcb0bf6d6d1999bf70758917f5.diff

LOG: [AArch64] Use known zero bits when creating BIC

If we know bits are already 0, we will not need to clear them again with a BIC.
So we can use KnownBits to shrink the size of the constant in the creation BIC
from And, potentially undoing the known-bits folds that happen during
compilation.

BIC only has a single register operand for input and output, so has less
scheduling freedom than a AND, but usually saves the materialization of a
constant.

Differential Revision: https://reviews.llvm.org/D154217

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
    llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
    llvm/test/CodeGen/AArch64/vec_uaddo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index db55853bcdbee2..4ce0a4e6b9ed90 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17135,14 +17135,22 @@ static SDValue performANDCombine(SDNode *N,
   if (resolveBuildVector(BVN, DefBits, UndefBits)) {
     SDValue NewOp;
 
-    DefBits = ~DefBits;
+    // Any bits known to already be 0 need not be cleared again, which can help
+    // reduce the size of the immediate to one supported by the instruction.
+    KnownBits Known = DAG.computeKnownBits(LHS);
+    APInt ZeroSplat(VT.getSizeInBits(), 0);
+    for (unsigned I = 0; I < VT.getSizeInBits() / Known.Zero.getBitWidth(); I++)
+      ZeroSplat |= Known.Zero.zext(VT.getSizeInBits())
+                   << (Known.Zero.getBitWidth() * I);
+
+    DefBits = ~(DefBits | ZeroSplat);
     if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
                                     DefBits, &LHS)) ||
         (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
                                     DefBits, &LHS)))
       return NewOp;
 
-    UndefBits = ~UndefBits;
+    UndefBits = ~(UndefBits | ZeroSplat);
     if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
                                     UndefBits, &LHS)) ||
         (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,

diff  --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 6fc392a0aa563d..50f38b1744ef5c 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1483,9 +1483,8 @@ define <2 x i64> @and64imm8h_lsl8(<2 x i64> %a) {
 define <8 x i16> @bic_shifted_knownbits(<8 x i16> %v) {
 ; CHECK-LABEL: bic_shifted_knownbits:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.8h, #1
 ; CHECK-NEXT:    ushr v0.8h, v0.8h, #9
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    bic v0.8h, #126
 ; CHECK-NEXT:    ret
 entry:
   %vshr_n = lshr <8 x i16> %v, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -1496,12 +1495,10 @@ entry:
 define <8 x i32> @bic_shifted_knownbits2(<8 x i16> %v) {
 ; CHECK-LABEL: bic_shifted_knownbits2:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #-1048321 // =0xfff000ff
 ; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
 ; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    dup v2.4s, w8
-; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    bic v1.4s, #255, lsl #8
+; CHECK-NEXT:    bic v0.4s, #255, lsl #8
 ; CHECK-NEXT:    ret
 entry:
   %vshr_n = zext <8 x i16> %v to <8 x i32>
@@ -1525,11 +1522,10 @@ define <8 x i32> @bic_shifted_knownbits3(<8 x i16> %v) {
 define <8 x i32> @bic_shifted_knownbits4(<8 x i32> %v) {
 ; CHECK-LABEL: bic_shifted_knownbits4:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v2.2d, #0xffff0000ffff0000
 ; CHECK-NEXT:    shl v0.4s, v0.4s, #8
 ; CHECK-NEXT:    shl v1.4s, v1.4s, #8
-; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    bic v0.4s, #255, lsl #8
+; CHECK-NEXT:    bic v1.4s, #255, lsl #8
 ; CHECK-NEXT:    ret
 entry:
   %vshr_n = shl <8 x i32> %v, <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>

diff  --git a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
index ec4e3b3e42b7f9..2f3193656f18a3 100644
--- a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
+++ b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
@@ -157,7 +157,7 @@ define <2 x i32> @shiftedreg_from_and_negative_type(<2 x i32> %a, <2 x i32> %b)
 ; CHECK-LABEL: shiftedreg_from_and_negative_type:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v0.2s, v0.2s, #2
-; CHECK-NEXT:    bic v0.2s, #31
+; CHECK-NEXT:    bic v0.2s, #28
 ; CHECK-NEXT:    sub v0.2s, v1.2s, v0.2s
 ; CHECK-NEXT:    ret
   %shl = shl <2 x i32> %a, <i32 2, i32 2>

diff  --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
index b660d9e16c4989..cf4f6095f4b179 100644
--- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
@@ -218,7 +218,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
 ; CHECK-NEXT:    mov w10, v0.s[1]
 ; CHECK-NEXT:    fmov w11, s0
 ; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    bic v1.4s, #255, lsl #24
+; CHECK-NEXT:    bic v1.4s, #1, lsl #24
 ; CHECK-NEXT:    sturh w8, [x0, #9]
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    cmeq v1.4s, v1.4s, v0.4s
@@ -251,12 +251,13 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind {
 ; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
 ; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
 ; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    shl v1.4h, v0.4h, #15
-; CHECK-NEXT:    and v2.8b, v0.8b, v2.8b
-; CHECK-NEXT:    cmeq v0.4h, v2.4h, v0.4h
-; CHECK-NEXT:    cmlt v1.4h, v1.4h, #0
-; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    fmov d1, d0
+; CHECK-NEXT:    shl v2.4h, v0.4h, #15
+; CHECK-NEXT:    bic v1.4h, #2
+; CHECK-NEXT:    cmeq v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    cmlt v1.4h, v2.4h, #0
 ; CHECK-NEXT:    and v1.8b, v1.8b, v3.8b
+; CHECK-NEXT:    mvn v0.8b, v0.8b
 ; CHECK-NEXT:    addv h1, v1.4h
 ; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-NEXT:    fmov w8, s1