[llvm] [AArch64] Change how we do bit computations for bfi (PR #142646)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 07:49:04 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/142646
>From b10979eda2cc66d1bd08602eed8dc82c99e04632 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 24 May 2025 20:59:22 -0400
Subject: [PATCH 1/2] Test known zero with regressions
---
llvm/test/CodeGen/AArch64/bitfield-insert.ll | 29 ++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index eefb862c5313c..b778eb2d0ac58 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -753,3 +753,32 @@ entry:
store i16 %or, ptr %p
ret i16 %and1
}
+
+define i32 @test1_known_not_zero(i32 %a) {
+; CHECK-LABEL: test1_known_not_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #12288 // =0x3000
+; CHECK-NEXT: and w9, w0, #0xfffffffe
+; CHECK-NEXT: movk w8, #7, lsl #16
+; CHECK-NEXT: and w9, w9, #0xfff00fff
+; CHECK-NEXT: orr w0, w9, w8
+; CHECK-NEXT: ret
+ %and1 = and i32 %a, -1044482
+ %or = or disjoint i32 %and1, 471040
+ ret i32 %or
+}
+
+define i32 @test1_known_not_zero_mul(i32 %a) {
+; CHECK-LABEL: test1_known_not_zero_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mul w9, w0, w0
+; CHECK-NEXT: mov w8, #12288 // =0x3000
+; CHECK-NEXT: movk w8, #7, lsl #16
+; CHECK-NEXT: and w9, w9, #0xfff03fff
+; CHECK-NEXT: orr w0, w9, w8
+; CHECK-NEXT: ret
+ %mul = mul i32 %a, %a ; We set the second bit to 0.
+ %and = and i32 %mul, -1044483
+ %or = or disjoint i32 %and, 471040
+ ret i32 %or
+}
>From 03387c99785fd982fb2900bc1b28d541b3211c1c Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 25 May 2025 19:27:17 -0400
Subject: [PATCH 2/2] [AArch64] Change how we do bit computations for bfi
Instead of relying on known 0s, we can instead focus on what we do know about the final result, and bxfil that.
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 54 ++++++++++++-------
llvm/test/CodeGen/AArch64/bitfield-insert.ll | 22 ++++----
2 files changed, 47 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 34f6db9374cb5..63ecba17e5028 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -3316,28 +3316,46 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
!isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
return false;
- // Compute the Known Zero for the AND as this allows us to catch more general
- // cases than just looking for AND with imm.
- KnownBits Known = CurDAG->computeKnownBits(And);
+ // In the event that computeKnownBits gives us more bits than we bargained
+ // for, and cause the the result to not be a shifted mask, we
+ // should just focus on the maskImm
- // Non-zero in the sense that they're not provably zero, which is the key
- // point if we want to use this value.
- uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
+ // We can optimize any chain of bitwise operations that results in a series of
+ // contiguous bits being set if we know the end result will have those bits
+ // set.
- // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
- if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
- return false;
+ KnownBits Known = CurDAG->computeKnownBits(And.getOperand(0));
+ uint64_t OldOne = Known.One.getZExtValue();
+ uint64_t OldZero = Known.Zero.getZExtValue();
- // The bits being inserted must only set those bits that are known to be zero.
- if ((OrImm & NotKnownZero) != 0) {
- // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
- // currently handle this case.
- return false;
+ Known &= KnownBits::makeConstant(APInt(BitWidth, MaskImm));
+ Known |= KnownBits::makeConstant(APInt(BitWidth, OrImm));
+
+ // Now we need to see if any KnownBits changed
+ uint64_t NewOneVal = Known.One.getZExtValue();
+ uint64_t NewZeroVal = Known.Zero.getZExtValue();
+
+ uint64_t ChangedBits = (NewOneVal ^ OldOne) | (NewZeroVal ^ OldZero);
+
+ // Find smallest bitfield insert that encompasses all changed bits and any
+ // known bits between them.
+ if (ChangedBits == 0) {
+ // No change? Then we can just replace the node with the 1st AND operand.
+ CurDAG->ReplaceAllUsesWith(SDValue(N, 0), And->getOperand(0));
+ return true;
}
- // BFI/BFXIL dst, src, #lsb, #width.
- int LSB = llvm::countr_one(NotKnownZero);
- int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
+ // Find the span:
+ APInt ChangedBitsAPInt = APInt(BitWidth, ChangedBits);
+ int LSB = ChangedBitsAPInt.countr_zero();
+ int MSB = BitWidth - ChangedBitsAPInt.countl_zero();
+ int Width = MSB - LSB;
+ uint64_t SpanMask = ((1ULL << Width) - 1) << LSB;
+
+ // Bail if *any* bit in that run is still unknown:
+ uint64_t Unknown = ~(NewOneVal | NewZeroVal);
+ if ((Unknown & SpanMask) != 0)
+ return false;
// BFI/BFXIL is an alias of BFM, so translate to BFM operands.
unsigned ImmR = (BitWidth - LSB) % BitWidth;
@@ -3348,7 +3366,7 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
// ORR. A BFXIL will use the same constant as the original ORR, so the code
// should be no worse in this case.
bool IsBFI = LSB != 0;
- uint64_t BFIImm = OrImm >> LSB;
+ uint64_t BFIImm = (NewOneVal & SpanMask) >> LSB;
if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
// We have a BFI instruction and we know the constant can't be materialized
// with a ORR-immediate with the zero register.
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index b778eb2d0ac58..556c7ecde792f 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -768,17 +768,17 @@ define i32 @test1_known_not_zero(i32 %a) {
ret i32 %or
}
-define i32 @test1_known_not_zero_mul(i32 %a) {
-; CHECK-LABEL: test1_known_not_zero_mul:
+define i32 @hole_test(i32 %a) {
+; CHECK-LABEL: hole_test:
; CHECK: // %bb.0:
; CHECK-NEXT: mul w9, w0, w0
-; CHECK-NEXT: mov w8, #12288 // =0x3000
-; CHECK-NEXT: movk w8, #7, lsl #16
-; CHECK-NEXT: and w9, w9, #0xfff03fff
-; CHECK-NEXT: orr w0, w9, w8
-; CHECK-NEXT: ret
- %mul = mul i32 %a, %a ; We set the second bit to 0.
- %and = and i32 %mul, -1044483
- %or = or disjoint i32 %and, 471040
- ret i32 %or
+; CHECK-NEXT: mov w8, #65533 // =0xfffd
+; CHECK-NEXT: movk w8, #65525, lsl #16
+; CHECK-NEXT: orr w9, w9, #0xff0000
+; CHECK-NEXT: and w0, w9, w8
+; CHECK-NEXT: ret
+ %2 = mul i32 %a, %a
+ %3 = and i32 %2, -4128771
+ %4 = or i32 %3, 16056320
+ ret i32 %4
}
More information about the llvm-commits
mailing list