[llvm] c252200 - [DAG][ARM][MIPS][RISCV] Improve funnel shift promotion to use 'double shift' patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 12 06:20:29 PDT 2020
Author: Simon Pilgrim
Date: 2020-10-12T14:11:02+01:00
New Revision: c252200e4de4411478fb525e93cc7ae05b189a22
URL: https://github.com/llvm/llvm-project/commit/c252200e4de4411478fb525e93cc7ae05b189a22
DIFF: https://github.com/llvm/llvm-project/commit/c252200e4de4411478fb525e93cc7ae05b189a22.diff
LOG: [DAG][ARM][MIPS][RISCV] Improve funnel shift promotion to use 'double shift' patterns
Based on a discussion on D88783, if we're promoting a funnel shift to a width at least twice the size as the original type, then we can use the 'double shift' patterns (shifting the concatenated sources).
Differential Revision: https://reviews.llvm.org/D89139
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/test/CodeGen/ARM/funnel-shift.ll
llvm/test/CodeGen/Mips/funnel-shift.ll
llvm/test/CodeGen/RISCV/rv64Zbt.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0000fcb1dde1..8c2efe89d6f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1129,27 +1129,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Lo = GetPromotedInteger(N->getOperand(1));
SDValue Amount = GetPromotedInteger(N->getOperand(2));
- unsigned OldBits = N->getOperand(0).getScalarValueSizeInBits();
- unsigned NewBits = Hi.getScalarValueSizeInBits();
-
- // Shift Lo up to occupy the upper bits of the promoted type.
SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
EVT VT = Lo.getValueType();
- Lo = DAG.getNode(ISD::SHL, DL, VT, Lo,
- DAG.getConstant(NewBits - OldBits, DL, VT));
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
// Amount has to be interpreted modulo the old bit width.
Amount =
DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
- unsigned Opcode = N->getOpcode();
- if (Opcode == ISD::FSHR) {
- // Increase Amount to shift the result into the lower bits of the promoted
- // type.
- Amount = DAG.getNode(ISD::ADD, DL, VT, Amount,
- DAG.getConstant(NewBits - OldBits, DL, VT));
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
+ Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
+ return Res;
}
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
+ Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
+
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
}
diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll
index a8b6aff767a7..54c93b493c98 100644
--- a/llvm/test/CodeGen/ARM/funnel-shift.ll
+++ b/llvm/test/CodeGen/ARM/funnel-shift.ll
@@ -19,13 +19,10 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshl_i16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: and r2, r2, #15
-; CHECK-NEXT: mov r3, #31
-; CHECK-NEXT: lsl r1, r1, #16
-; CHECK-NEXT: bic r3, r3, r2
-; CHECK-NEXT: lsl r0, r0, r2
-; CHECK-NEXT: lsr r1, r1, #1
-; CHECK-NEXT: orr r0, r0, r1, lsr r3
+; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16
+; CHECK-NEXT: and r1, r2, #15
+; CHECK-NEXT: lsl r0, r0, r1
+; CHECK-NEXT: lsr r0, r0, #16
; CHECK-NEXT: bx lr
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
@@ -188,15 +185,9 @@ define i8 @fshl_i8_const_fold() {
define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshr_i16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: mov r3, #1
-; CHECK-NEXT: lsl r0, r0, #1
-; CHECK-NEXT: bfi r2, r3, #4, #28
-; CHECK-NEXT: mov r3, #31
-; CHECK-NEXT: bic r3, r3, r2
-; CHECK-NEXT: and r2, r2, #31
-; CHECK-NEXT: lsl r1, r1, #16
-; CHECK-NEXT: lsl r0, r0, r3
-; CHECK-NEXT: orr r0, r0, r1, lsr r2
+; CHECK-NEXT: pkhbt r0, r1, r0, lsl #16
+; CHECK-NEXT: and r1, r2, #15
+; CHECK-NEXT: lsr r0, r0, r1
; CHECK-NEXT: bx lr
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll
index 47d3db18c003..99029b7b9410 100644
--- a/llvm/test/CodeGen/Mips/funnel-shift.ll
+++ b/llvm/test/CodeGen/Mips/funnel-shift.ll
@@ -19,15 +19,13 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshl_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi $1, $6, 15
-; CHECK-NEXT: sllv $2, $4, $1
-; CHECK-NEXT: sll $3, $5, 16
-; CHECK-NEXT: srl $3, $3, 1
-; CHECK-NEXT: not $1, $1
-; CHECK-NEXT: andi $1, $1, 31
-; CHECK-NEXT: srlv $1, $3, $1
+; CHECK-NEXT: andi $1, $5, 65535
+; CHECK-NEXT: sll $2, $4, 16
+; CHECK-NEXT: or $1, $2, $1
+; CHECK-NEXT: andi $2, $6, 15
+; CHECK-NEXT: sllv $1, $1, $2
; CHECK-NEXT: jr $ra
-; CHECK-NEXT: or $2, $2, $1
+; CHECK-NEXT: srl $2, $1, 16
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
}
@@ -288,15 +286,12 @@ define i8 @fshl_i8_const_fold() {
define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
; CHECK-LABEL: fshr_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: sll $1, $5, 16
+; CHECK-NEXT: andi $1, $5, 65535
+; CHECK-NEXT: sll $2, $4, 16
+; CHECK-NEXT: or $1, $2, $1
; CHECK-NEXT: andi $2, $6, 15
-; CHECK-NEXT: ori $3, $2, 16
-; CHECK-NEXT: srlv $1, $1, $3
-; CHECK-NEXT: sll $3, $4, 1
-; CHECK-NEXT: xori $2, $2, 15
-; CHECK-NEXT: sllv $2, $3, $2
; CHECK-NEXT: jr $ra
-; CHECK-NEXT: or $2, $2, $1
+; CHECK-NEXT: srlv $2, $1, $2
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
ret i16 %f
}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
index 3e6201bac967..dc736ade8071 100644
--- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
@@ -109,14 +109,13 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
; RV64I-LABEL: fshl_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a2, a2, 31
-; RV64I-NEXT: sll a0, a0, a2
-; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: srli a1, a1, 1
-; RV64I-NEXT: srl a1, a1, a2
+; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: or a0, a0, a1
-; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: andi a1, a2, 31
+; RV64I-NEXT: sll a0, a0, a1
+; RV64I-NEXT: srai a0, a0, 32
; RV64I-NEXT: ret
;
; RV64IB-LABEL: fshl_i32:
@@ -162,14 +161,12 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
; RV64I-LABEL: fshr_i32:
; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: andi a2, a2, 31
-; RV64I-NEXT: ori a3, a2, 32
-; RV64I-NEXT: srl a1, a1, a3
-; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: xori a2, a2, 31
-; RV64I-NEXT: sll a0, a0, a2
+; RV64I-NEXT: srli a1, a1, 32
; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: andi a1, a2, 31
+; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
;
More information about the llvm-commits
mailing list