[llvm] 3416236 - [SDAG] match rotate pattern with extra 'or' operation
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 9 10:19:06 PST 2022
Author: Sanjay Patel
Date: 2022-03-09T13:19:00-05:00
New Revision: 341623653d891386b7943445981565ed1dff2a18
URL: https://github.com/llvm/llvm-project/commit/341623653d891386b7943445981565ed1dff2a18
DIFF: https://github.com/llvm/llvm-project/commit/341623653d891386b7943445981565ed1dff2a18.diff
LOG: [SDAG] match rotate pattern with extra 'or' operation
This is another fold generalized from D111530.
We can find a common source for a rotate operation hidden inside an 'or':
https://alive2.llvm.org/ce/z/9pV8hn
Deciding when this is profitable vs. a funnel-shift is tricky, but this
does not show any regressions: if a target has a rotate but it does not
have a funnel-shift, then try to form the rotate here. That is why we
don't have x86 test diffs for the scalar tests that are duplicated from
AArch64 ( 74a65e3834d9487 ) - shld/shrd are available. That also makes it
difficult to show vector diffs - the only case where I found a diff was
on x86 AVX512 or XOP with i64 elements.
There's an additional check for a legal type to avoid a problem seen
with x86-32 where we form a 64-bit rotate but then it gets split
inefficiently. We might avoid that by adding more rotate folds, but
I didn't check to see what is missing on that path.
This gets most of the motivating patterns for AArch64 / ARM that are in
D111530.
We still need a couple of enhancements to setcc pattern matching with
rotate/funnel-shift to get the rest.
Differential Revision: https://reviews.llvm.org/D120933
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
llvm/test/CodeGen/AArch64/logic-shift.ll
llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
llvm/test/CodeGen/ARM/icmp-shift-opt.ll
llvm/test/CodeGen/X86/rotate_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b76f4711322f5..a3e85cecf39b9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7405,11 +7405,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
- // TODO: Support pre-legalization funnel-shift by constant.
- bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
- if (!IsRotate && !(HasFSHL || HasFSHR))
- return SDValue(); // Requires funnel shift support.
-
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
@@ -7423,15 +7418,57 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
+ auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+ };
+
+ // TODO: Support pre-legalization funnel-shift by constant.
+ bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ if (!IsRotate && !(HasFSHL || HasFSHR)) {
+ if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ // Look for a disguised rotate by constant.
+ // The common shifted operand X may be hidden inside another 'or'.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
+ if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
+ return false;
+ if (CommonOp == Or.getOperand(0)) {
+ X = CommonOp;
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (CommonOp == Or.getOperand(1)) {
+ X = CommonOp;
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
+ if (matchOr(LHSShiftArg, RHSShiftArg)) {
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
+ }
+ // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
+ if (matchOr(RHSShiftArg, LHSShiftArg)) {
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
+ }
+ }
+
+ return SDValue(); // Requires funnel shift support.
+ }
+
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
// fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
// fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
// iff C1+C2 == EltSizeInBits
- auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
- return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
- };
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
index 6b4d898a41a87..368246dbb2426 100644
--- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
@@ -12,8 +12,7 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds x0, x0, #1
; CHECK-NEXT: adcs x1, x1, xzr
-; CHECK-NEXT: orr x8, x0, x1
-; CHECK-NEXT: extr x8, x1, x8, #60
+; CHECK-NEXT: orr x8, x1, x0, lsr #60
; CHECK-NEXT: cbnz x8, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
@@ -32,8 +31,7 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr x8, x0, x1
-; CHECK-NEXT: extr x8, x1, x8, #17
+; CHECK-NEXT: orr x8, x1, x0, lsr #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -45,8 +43,7 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr x8, x0, x1
-; CHECK-NEXT: extr x8, x1, x8, #17
+; CHECK-NEXT: orr x8, x1, x0, lsr #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -58,8 +55,7 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr x8, x1, x0
-; CHECK-NEXT: extr x8, x8, x0, #47
+; CHECK-NEXT: orr x8, x0, x1, lsl #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
@@ -71,8 +67,7 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr x8, x1, x0
-; CHECK-NEXT: extr x8, x8, x0, #47
+; CHECK-NEXT: orr x8, x0, x1, lsl #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
@@ -106,8 +101,7 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr x8, x0, x1
-; CHECK-NEXT: extr x8, x8, x1, #47
+; CHECK-NEXT: orr x8, x1, x0, lsl #17
; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
index 7889bda08a4f6..12c3e18317f88 100644
--- a/llvm/test/CodeGen/AArch64/logic-shift.ll
+++ b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -690,8 +690,8 @@ define i64 @mix_logic_shl(i64 %x0, i64 %x1, i64 %y, i64 %z) {
define i32 @or_fshl_commute0(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshl_commute0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w0, w1
-; CHECK-NEXT: extr w0, w8, w0, #27
+; CHECK-NEXT: ror w8, w0, #27
+; CHECK-NEXT: orr w0, w8, w1, lsl #5
; CHECK-NEXT: ret
%or1 = or i32 %x, %y
%sh1 = shl i32 %or1, 5
@@ -703,8 +703,8 @@ define i32 @or_fshl_commute0(i32 %x, i32 %y) {
define i64 @or_fshl_commute1(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshl_commute1:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w1, w0
-; CHECK-NEXT: extr x0, x8, x0, #29
+; CHECK-NEXT: ror x8, x0, #29
+; CHECK-NEXT: orr x0, x8, x1, lsl #35
; CHECK-NEXT: ret
%or1 = or i64 %y, %x
%sh1 = shl i64 %or1, 35
@@ -762,8 +762,8 @@ define i32 @or_fshl_wrong_shift(i32 %x, i32 %y) {
define i64 @or_fshr_commute0(i64 %x, i64 %y) {
; CHECK-LABEL: or_fshr_commute0:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr x8, x0, x1
-; CHECK-NEXT: extr x0, x0, x8, #24
+; CHECK-NEXT: ror x8, x0, #24
+; CHECK-NEXT: orr x0, x8, x1, lsr #24
; CHECK-NEXT: ret
%or1 = or i64 %x, %y
%sh1 = shl i64 %x, 40
@@ -775,8 +775,8 @@ define i64 @or_fshr_commute0(i64 %x, i64 %y) {
define i32 @or_fshr_commute1(i32 %x, i32 %y) {
; CHECK-LABEL: or_fshr_commute1:
; CHECK: // %bb.0:
-; CHECK-NEXT: orr w8, w1, w0
-; CHECK-NEXT: extr w0, w0, w8, #29
+; CHECK-NEXT: ror w8, w0, #29
+; CHECK-NEXT: orr w0, w8, w1, lsr #29
; CHECK-NEXT: ret
%or1 = or i32 %y, %x
%sh1 = shl i32 %x, 3
diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
index 9675c32393daa..ef1d61dc66311 100644
--- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
+++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
@@ -630,14 +630,10 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7M-NEXT: ldrd lr, r0, [sp, #8]
; CHECKV7M-NEXT: beq .LBB6_2
; CHECKV7M-NEXT: @ %bb.1: @ %then
-; CHECKV7M-NEXT: orrs r2, r3
-; CHECKV7M-NEXT: lsrs r2, r2, #17
-; CHECKV7M-NEXT: orr.w r2, r2, r3, lsl #15
-; CHECKV7M-NEXT: orr.w r3, r12, r1
+; CHECKV7M-NEXT: orr.w r2, r3, r2, lsr #17
+; CHECKV7M-NEXT: orr.w r1, r1, r12, lsr #17
; CHECKV7M-NEXT: cmp r2, #0
; CHECKV7M-NEXT: mov r2, r0
-; CHECKV7M-NEXT: lsr.w r3, r3, #17
-; CHECKV7M-NEXT: orr.w r1, r3, r1, lsl #15
; CHECKV7M-NEXT: it ne
; CHECKV7M-NEXT: movne r2, lr
; CHECKV7M-NEXT: cmp r1, #0
@@ -646,9 +642,7 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7M-NEXT: add r0, r2
; CHECKV7M-NEXT: pop {r7, pc}
; CHECKV7M-NEXT: .LBB6_2: @ %else
-; CHECKV7M-NEXT: orr.w r1, r2, r3
-; CHECKV7M-NEXT: lsrs r1, r1, #17
-; CHECKV7M-NEXT: orr.w r1, r1, r3, lsl #15
+; CHECKV7M-NEXT: orr.w r1, r3, r2, lsr #17
; CHECKV7M-NEXT: cmp r1, #0
; CHECKV7M-NEXT: it ne
; CHECKV7M-NEXT: movne r0, lr
@@ -664,14 +658,10 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: lsls r4, r4, #31
; CHECKV7A-NEXT: beq .LBB6_2
; CHECKV7A-NEXT: @ %bb.1: @ %then
-; CHECKV7A-NEXT: orrs r2, r3
-; CHECKV7A-NEXT: lsrs r2, r2, #17
-; CHECKV7A-NEXT: orr.w r2, r2, r3, lsl #15
-; CHECKV7A-NEXT: orr.w r3, r12, r1
+; CHECKV7A-NEXT: orr.w r2, r3, r2, lsr #17
+; CHECKV7A-NEXT: orr.w r1, r1, r12, lsr #17
; CHECKV7A-NEXT: cmp r2, #0
; CHECKV7A-NEXT: mov r2, r0
-; CHECKV7A-NEXT: lsr.w r3, r3, #17
-; CHECKV7A-NEXT: orr.w r1, r3, r1, lsl #15
; CHECKV7A-NEXT: it ne
; CHECKV7A-NEXT: movne r2, lr
; CHECKV7A-NEXT: cmp r1, #0
@@ -680,9 +670,7 @@ define i32 @icmp64_uge_m2(i64 %x, i64 %y, i32 %a, i32 %b, i1 %c) {
; CHECKV7A-NEXT: add r0, r2
; CHECKV7A-NEXT: pop {r4, pc}
; CHECKV7A-NEXT: .LBB6_2: @ %else
-; CHECKV7A-NEXT: orr.w r1, r2, r3
-; CHECKV7A-NEXT: lsrs r1, r1, #17
-; CHECKV7A-NEXT: orr.w r1, r1, r3, lsl #15
+; CHECKV7A-NEXT: orr.w r1, r3, r2, lsr #17
; CHECKV7A-NEXT: cmp r1, #0
; CHECKV7A-NEXT: it ne
; CHECKV7A-NEXT: movne r0, lr
diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
index 492d6477c735f..a78978f977f86 100644
--- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
@@ -12,9 +12,7 @@ define i64 @opt_setcc_lt_power_of_2(i64 %a) nounwind {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds r0, r0, #1
; CHECK-NEXT: adc r1, r1, #0
-; CHECK-NEXT: orr r2, r0, r1
-; CHECK-NEXT: uxth r3, r1
-; CHECK-NEXT: orr r2, r3, r2, lsr #16
+; CHECK-NEXT: orr r2, r1, r0, lsr #16
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: bne .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %exit
@@ -34,9 +32,7 @@ exit:
define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_eq_zero:
; CHECK: @ %bb.0:
-; CHECK-NEXT: orr r0, r0, r1
-; CHECK-NEXT: lsr r0, r0, #17
-; CHECK-NEXT: orr r0, r0, r1, lsl #15
+; CHECK-NEXT: orr r0, r1, r0, lsr #17
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr
@@ -48,9 +44,7 @@ define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {
define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_srl_ne_zero:
; CHECK: @ %bb.0:
-; CHECK-NEXT: orr r0, r0, r1
-; CHECK-NEXT: lsr r0, r0, #17
-; CHECK-NEXT: orr r0, r0, r1, lsl #15
+; CHECK-NEXT: orr r0, r1, r0, lsr #17
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
@@ -62,9 +56,7 @@ define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {
define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_eq_zero:
; CHECK: @ %bb.0:
-; CHECK-NEXT: orr r1, r1, r0
-; CHECK-NEXT: lsl r1, r1, #17
-; CHECK-NEXT: orr r0, r1, r0, lsr #15
+; CHECK-NEXT: orr r0, r0, r1, lsl #17
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr
@@ -76,9 +68,7 @@ define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {
define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind {
; CHECK-LABEL: opt_setcc_shl_ne_zero:
; CHECK: @ %bb.0:
-; CHECK-NEXT: orr r1, r1, r0
-; CHECK-NEXT: lsl r1, r1, #17
-; CHECK-NEXT: orr r0, r1, r0, lsr #15
+; CHECK-NEXT: orr r0, r0, r1, lsl #17
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movwne r0, #1
; CHECK-NEXT: bx lr
@@ -113,9 +103,7 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i64 %a) nounwind {
define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
; CHECK: @ %bb.0:
-; CHECK-NEXT: orr r0, r0, r1
-; CHECK-NEXT: lsl r0, r0, #17
-; CHECK-NEXT: orr r0, r0, r1, lsr #15
+; CHECK-NEXT: orr r0, r1, r0, lsl #17
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsr r0, r0, #5
; CHECK-NEXT: bx lr
diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll
index c08110fdda067..af7e24887328f 100644
--- a/llvm/test/CodeGen/X86/rotate_vec.ll
+++ b/llvm/test/CodeGen/X86/rotate_vec.ll
@@ -230,13 +230,19 @@ define <4 x i32> @or_fshl_v4i32(<4 x i32> %x, <4 x i32> %y) {
}
define <2 x i64> @or_fshr_v2i64(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: or_fshr_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm1
-; CHECK-NEXT: vpsllq $42, %xmm0, %xmm0
-; CHECK-NEXT: vpsrlq $22, %xmm1, %xmm1
-; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; XOP-LABEL: or_fshr_v2i64:
+; XOP: # %bb.0:
+; XOP-NEXT: vpsrlq $22, %xmm1, %xmm1
+; XOP-NEXT: vprotq $42, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: or_fshr_v2i64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpsrlq $22, %xmm1, %xmm1
+; AVX512-NEXT: vprolq $42, %xmm0, %xmm0
+; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%or1 = or <2 x i64> %x, %y
%sh1 = shl <2 x i64> %x, <i64 42, i64 42>
%sh2 = lshr <2 x i64> %or1, <i64 22, i64 22>
More information about the llvm-commits
mailing list