[llvm] 34d18fd - [AArch64] Enhance bit-field-positioning op matcher to see through 'any_extend' for pattern 'and(any_extend(shl(val, N)), shifted-mask)'
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 18 09:08:05 PDT 2022
Author: Mingming Liu
Date: 2022-10-18T09:07:14-07:00
New Revision: 34d18fd241abafdf0436cbceebdeff2ecf685ae2
URL: https://github.com/llvm/llvm-project/commit/34d18fd241abafdf0436cbceebdeff2ecf685ae2
DIFF: https://github.com/llvm/llvm-project/commit/34d18fd241abafdf0436cbceebdeff2ecf685ae2.diff
LOG: [AArch64] Enhance bit-field-positioning op matcher to see through 'any_extend' for pattern 'and(any_extend(shl(val, N)), shifted-mask)'
Before this patch (and refactor patch D135843), isBitfieldPositioningOp won't handle "and(any_extend(shl(val, N), shifted-mask)" (bail out if AND op is not SHL)
After this patch, isBitfieldPositioningOp will see through "any_extend" to find "shl" to find possible bit-field-positioning nodes.
https://gcc.godbolt.org/z/3ncGKbGW6 is a four-liner LLVM IR that could be optimized to UBFIZ (see added test case test_and_extended_shift_with_imm in llvm/test/CodeGen/AArch64/bitfield-insert.ll). One existing test case also improves.
Differential Revision: https://reviews.llvm.org/D135852
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/test/CodeGen/AArch64/bitfield-insert.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 629b2403470dc..844f9c0c7159a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2580,17 +2580,54 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
SDValue AndOp0 = Op.getOperand(0);
uint64_t ShlImm;
- if (!isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm))
+ SDValue ShlOp0;
+ if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
+ // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
+ ShlOp0 = AndOp0.getOperand(0);
+ } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
+ isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
+ ShlImm)) {
+ // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
+
+ // ShlVal == shl(val, N), which is a left shift on a smaller type.
+ SDValue ShlVal = AndOp0.getOperand(0);
+
+ // Since this is after type legalization and ShlVal is extended to MVT::i64,
+ // expect VT to be MVT::i32.
+ assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
+
+ // Widens 'val' to MVT::i64 as the source of bit field positioning.
+ ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
+ } else
return false;
- // Bail out if the SHL has more than one use, since then we'll end up
- // generating SHL+UBFIZ instead of just keeping SHL+AND.
+ // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
+ // then we'll end up generating AndOp0+UBFIZ instead of just keeping
+ // AndOp0+AND.
if (!BiggerPattern && !AndOp0.hasOneUse())
return false;
DstLSB = countTrailingZeros(NonZeroBits);
Width = countTrailingOnes(NonZeroBits >> DstLSB);
+ // Bail out on large Width. This happens when no proper combining / constant
+ // folding was performed.
+ if (Width >= (int)VT.getSizeInBits()) {
+ // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
+ // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
+ // "val".
+ // If VT is i32, what Width >= 32 means:
+ // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
+ // demands at least 'Width' bits (after dag-combiner). This together with
+ // `any_extend` Op (undefined higher bits) indicates missed combination
+ // when lowering the 'and' IR instruction to an machine IR instruction.
+ LLVM_DEBUG(
+ dbgs()
+ << "Found large Width in bit-field-positioning -- this indicates no "
+ "proper combining / constant folding was performed\n");
+ return false;
+ }
+
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
// amount. BiggerPattern is true when this pattern is being matched for BFI,
@@ -2599,7 +2636,7 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
return false;
- Src = getLeftShift(CurDAG, AndOp0.getOperand(0), ShlImm - DstLSB);
+ Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index 5cc714306b0a3..a27e293ffe881 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -580,9 +580,8 @@ define <2 x i32> @test_complex_type(<2 x i32>* %addr, i64 %in, i64* %bf ) {
define i64 @test_truncated_shift(i64 %x, i64 %y) {
; CHECK-LABEL: test_truncated_shift:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: lsl w8, w1, #25
-; CHECK-NEXT: lsr x8, x8, #25
-; CHECK-NEXT: bfi x0, x8, #25, #5
+; CHECK-NEXT: // kill: def $w1 killed $w1 killed $x1 def $x1
+; CHECK-NEXT: bfi x0, x1, #25, #5
; CHECK-NEXT: ret
entry:
%and = and i64 %x, -1040187393
@@ -591,3 +590,14 @@ entry:
%or = or i64 %and5, %and
ret i64 %or
}
+
+define i64 @test_and_extended_shift_with_imm(i64 %0) {
+; CHECK-LABEL: test_and_extended_shift_with_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 def $x0
+; CHECK-NEXT: ubfiz x0, x0, #7, #8
+; CHECK-NEXT: ret
+ %2 = shl i64 %0, 7
+ %3 = and i64 %2, 32640 ; #0x7f80
+ ret i64 %3
+}
More information about the llvm-commits
mailing list