[llvm] 0f9ef8b - [AArch64] Select BFI/BFXIL to ORR with shifted operand when one operand is the left or right shift of another operand
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 11 14:01:50 PST 2022
Author: Mingming Liu
Date: 2022-11-11T14:01:02-08:00
New Revision: 0f9ef8b18055c9f7ca534fab24f74266331ec3e5
URL: https://github.com/llvm/llvm-project/commit/0f9ef8b18055c9f7ca534fab24f74266331ec3e5
DIFF: https://github.com/llvm/llvm-project/commit/0f9ef8b18055c9f7ca534fab24f74266331ec3e5.diff
LOG: [AArch64] Select BFI/BFXIL to ORR with shifted operand when one operand is the left or right shift of another operand
Use right shift [1] as an example
- Before, bfxil is generated (https://godbolt.org/z/EfzWMszPn)
- After, orr with right-shifted operand is generated (added test cases in `CodeGen/AArch64/bitfield-insert.ll`)
[1]
```
define i64 @test_orr_not_bfxil_i64(i64 %0) {
%2 = and i64 %0, 1044480 ; 0xff000
%3 = lshr i64 %2, 12
%4 = or i64 %2, %3
ret i64 %4
}
```
Differential Revision: https://reviews.llvm.org/D137689
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/test/CodeGen/AArch64/bitfield-insert.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7b7817650a29..ab9bbe124033 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2875,10 +2875,17 @@ static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
return false;
}
+// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
+// the operands and select it to AArch64::ORR with shifted registers if
+// that's more efficient. Returns true iff selection to AArch64::ORR happens.
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
const bool BiggerPattern) {
EVT VT = N->getValueType(0);
+ assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
+ assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
+ (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
+ "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
assert((VT == MVT::i32 || VT == MVT::i64) &&
"Expect result type to be i32 or i64 since N is combinable to BFM");
SDLoc DL(N);
@@ -2887,6 +2894,7 @@ static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
if (OrOpd1 != Dst)
return false;
+ const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
// nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
if (BiggerPattern) {
@@ -2903,7 +2911,6 @@ static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
uint64_t EncodedShiftImm;
if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
EncodedShiftImm)) {
- unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
SDValue Ops[] = {OrOpd0, ShiftedOperand,
CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
@@ -2915,16 +2922,58 @@ static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
assert((!BiggerPattern) && "BiggerPattern should be handled above");
+ SDValue Op;
uint64_t ShlImm;
- if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm) &&
- OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
- unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
- SDValue Ops[] = {
- Dst, Src,
- CurDAG->getTargetConstant(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
- CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
- return true;
+ if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
+ if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
+ SDValue Ops[] = {
+ Dst, Src,
+ CurDAG->getTargetConstant(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
+ CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
+ return true;
+ }
+
+ // Select the following pattern to left-shifted operand rather than BFI.
+ // %val1 = op ..
+ // %val2 = shl %val1, #imm
+ // %res = or %val1, %val2
+ //
+ // If N is selected to be BFI, we know that
+ // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
+ // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
+ //
+ // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
+ if (OrOpd0.getOperand(0) == OrOpd1) {
+ SDValue Ops[] = {
+ OrOpd1, OrOpd1,
+ CurDAG->getTargetConstant(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
+ CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
+ return true;
+ }
+ }
+
+ uint64_t SrlImm;
+ if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
+ // Select the following pattern to right-shifted operand rather than BFXIL.
+ // %val1 = op ..
+ // %val2 = lshr %val1, #imm
+ // %res = or %val1, %val2
+ //
+ // If N is selected to be BFXIL, we know that
+ // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
+ // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
+ //
+ // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
+ if (OrOpd0.getOperand(0) == OrOpd1) {
+ SDValue Ops[] = {
+ OrOpd1, OrOpd1,
+ CurDAG->getTargetConstant(
+ AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
+ CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
+ return true;
+ }
}
return false;
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index 8a383e85a366..eeb1b544f57b 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -638,13 +638,12 @@ define i32 @test_orr_not_bfxil_i32(i32 %0) {
}
; For or operation, one operand is a left shift of another operand.
-; Use orr with left-shifted operand is better than bfi.
+; So orr with a left-shifted operand is generated (not bfi).
define i64 @test_orr_not_bfi_i64(i64 %0) {
; CHECK-LABEL: test_orr_not_bfi_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: and x8, x0, #0xff
-; CHECK-NEXT: bfi x8, x0, #8, #8
-; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: orr x0, x8, x8, lsl #8
; CHECK-NEXT: ret
%2 = and i64 %0, 255
%3 = shl i64 %2, 8
@@ -668,14 +667,13 @@ define i32 @test_bfi_not_orr_i32(i32 %0, i32 %1) {
ret i32 %or_res
}
-; orr is better than bfi, since both simplify away one instruction (%3)
+; orr is generated (not bfi), since both simplify away one instruction (%3)
; while orr has shorter latency and higher throughput.
define i32 @test_orr_not_bfi_i32(i32 %0) {
; CHECK-LABEL: test_orr_not_bfi_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: bfi w8, w0, #8, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: orr w0, w8, w8, lsl #8
; CHECK-NEXT: ret
%2 = and i32 %0, 255
%3 = shl i32 %2, 8
@@ -698,14 +696,13 @@ define i64 @test_bfxil_not_orr_i64(i64 %0, i64 %1) {
ret i64 %or_res
}
-; orr is better than bfxil, since one operand is the right shift of another
+; orr is generated (not bfxil), since one operand is the right shift of another
; operand.
define i64 @orr_not_bfxil_test2_i64(i64 %0) {
; CHECK-LABEL: orr_not_bfxil_test2_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: and x8, x0, #0xff000
-; CHECK-NEXT: bfxil x8, x0, #12, #8
-; CHECK-NEXT: mov x0, x8
+; CHECK-NEXT: orr x0, x8, x8, lsr #12
; CHECK-NEXT: ret
%2 = and i64 %0, 1044480 ; 0xff000
%3 = lshr i64 %2, 12
@@ -729,13 +726,12 @@ define i32 @test_bfxil_not_orr_i32(i32 %0, i32 %1) {
ret i32 %or_res
}
-; one operand is the shift of another operand, so orr is better.
+; one operand is the shift of another operand, so orr is generated (not bfxil).
define i32 @orr_not_bfxil_test2_i32(i32 %0) {
; CHECK-LABEL: orr_not_bfxil_test2_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff000
-; CHECK-NEXT: bfxil w8, w0, #12, #8
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: orr w0, w8, w8, lsr #12
; CHECK-NEXT: ret
%2 = and i32 %0, 1044480 ; 0xff000
%3 = lshr i32 %2, 12
More information about the llvm-commits
mailing list