[llvm] 34d18fd - [AArch64] Enhance bit-field-positioning op matcher to see through 'any_extend' for pattern 'and(any_extend(shl(val, N)), shifted-mask)'

Tue Oct 18 09:08:05 PDT 2022

Author: Mingming Liu
Date: 2022-10-18T09:07:14-07:00
New Revision: 34d18fd241abafdf0436cbceebdeff2ecf685ae2

URL: https://github.com/llvm/llvm-project/commit/34d18fd241abafdf0436cbceebdeff2ecf685ae2
DIFF: https://github.com/llvm/llvm-project/commit/34d18fd241abafdf0436cbceebdeff2ecf685ae2.diff

LOG: [AArch64] Enhance bit-field-positioning op matcher to see through 'any_extend' for pattern 'and(any_extend(shl(val, N)), shifted-mask)'

Before this patch (and refactor patch D135843), isBitfieldPositioningOp won't handle "and(any_extend(shl(val, N), shifted-mask)" (bail out if AND op is not SHL)

After this patch, isBitfieldPositioningOp will see through "any_extend" to find "shl" to find possible bit-field-positioning nodes.

https://gcc.godbolt.org/z/3ncGKbGW6 is a four-liner LLVM IR that could be optimized to UBFIZ (see added test case test_and_extended_shift_with_imm in llvm/test/CodeGen/AArch64/bitfield-insert.ll). One existing test case also improves.

Differential Revision: https://reviews.llvm.org/D135852

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/test/CodeGen/AArch64/bitfield-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 629b2403470dc..844f9c0c7159a 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2580,17 +2580,54 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
   SDValue AndOp0 = Op.getOperand(0);
 
   uint64_t ShlImm;
-  if (!isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm))
+  SDValue ShlOp0;
+  if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
+    // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
+    ShlOp0 = AndOp0.getOperand(0);
+  } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
+             isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
+                                   ShlImm)) {
+    // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
+
+    // ShlVal == shl(val, N), which is a left shift on a smaller type.
+    SDValue ShlVal = AndOp0.getOperand(0);
+
+    // Since this is after type legalization and ShlVal is extended to MVT::i64,
+    // expect VT to be MVT::i32.
+    assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
+
+    // Widens 'val' to MVT::i64 as the source of bit field positioning.
+    ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
+  } else
     return false;
 
-  // Bail out if the SHL has more than one use, since then we'll end up
-  // generating SHL+UBFIZ instead of just keeping SHL+AND.
+  // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
+  // then we'll end up generating AndOp0+UBFIZ instead of just keeping
+  // AndOp0+AND.
   if (!BiggerPattern && !AndOp0.hasOneUse())
     return false;
 
   DstLSB = countTrailingZeros(NonZeroBits);
   Width = countTrailingOnes(NonZeroBits >> DstLSB);
 
+  // Bail out on large Width. This happens when no proper combining / constant
+  // folding was performed.
+  if (Width >= (int)VT.getSizeInBits()) {
+    // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
+    // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
+    // "val".
+    // If VT is i32, what Width >= 32 means:
+    // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
+    //   demands at least 'Width' bits (after dag-combiner). This together with
+    //   `any_extend` Op (undefined higher bits) indicates missed combination
+    //   when lowering the 'and' IR instruction to an machine IR instruction.
+    LLVM_DEBUG(
+        dbgs()
+        << "Found large Width in bit-field-positioning -- this indicates no "
+           "proper combining / constant folding was performed\n");
+    return false;
+  }
+
   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
@@ -2599,7 +2636,7 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
     return false;
 
-  Src = getLeftShift(CurDAG, AndOp0.getOperand(0), ShlImm - DstLSB);
+  Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index 5cc714306b0a3..a27e293ffe881 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -580,9 +580,8 @@ define <2 x i32> @test_complex_type(<2 x i32>* %addr, i64 %in, i64* %bf ) {
 define i64 @test_truncated_shift(i64 %x, i64 %y) {
 ; CHECK-LABEL: test_truncated_shift:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    lsl w8, w1, #25
-; CHECK-NEXT:    lsr x8, x8, #25
-; CHECK-NEXT:    bfi x0, x8, #25, #5
+; CHECK-NEXT:    // kill: def $w1 killed $w1 killed $x1 def $x1
+; CHECK-NEXT:    bfi x0, x1, #25, #5
 ; CHECK-NEXT:    ret
 entry:
   %and = and i64 %x, -1040187393
@@ -591,3 +590,14 @@ entry:
   %or = or i64 %and5, %and
   ret i64 %or
 }
+
+define i64 @test_and_extended_shift_with_imm(i64 %0) {
+; CHECK-LABEL: test_and_extended_shift_with_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0 def $x0
+; CHECK-NEXT:    ubfiz x0, x0, #7, #8
+; CHECK-NEXT:    ret
+  %2 = shl i64 %0, 7
+  %3 = and i64 %2, 32640  ; #0x7f80
+  ret i64 %3
+}