[llvm] r270896 - [AArch64] Generate rev16/rev32 from bswap + srl when upper bits are known zero.
Chad Rosier via llvm-commits
llvm-commits at lists.llvm.org
Thu May 26 12:41:34 PDT 2016
Author: mcrosier
Date: Thu May 26 14:41:33 2016
New Revision: 270896
URL: http://llvm.org/viewvc/llvm-project?rev=270896&view=rev
Log:
[AArch64] Generate rev16/rev32 from bswap + srl when upper bits are known zero.
Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the high
16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32) to
(rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
test_rev_w_srl16: test_rev_w_srl16:
and w8, w0, #0xffff and w8, w0, #0xffff
rev w8, w8 ---> rev16 w0, w8
lsr w0, w8, #16
test_rev_x_srl32: test_rev_x_srl32:
rev x8, x8 ---> rev32 x0, x8
lsr x0, x8, #32
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-rev.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=270896&r1=270895&r2=270896&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu May 26 14:41:33 2016
@@ -475,7 +475,7 @@ AArch64TargetLowering::AArch64TargetLowe
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
-
+ setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -8001,6 +8001,34 @@ static SDValue performORCombine(SDNode *
return SDValue();
}
+static SDValue performSRLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
+ // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
+ // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() == ISD::BSWAP) {
+ SDLoc DL(N);
+ SDValue N1 = N->getOperand(1);
+ SDValue N00 = N0.getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ uint64_t ShiftAmt = C->getZExtValue();
+ if (VT == MVT::i32 && ShiftAmt == 16 &&
+ DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
+ return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
+ if (VT == MVT::i64 && ShiftAmt == 32 &&
+ DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
+ return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
+ }
+ }
+ return SDValue();
+}
+
static SDValue performBitcastCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -9893,6 +9921,8 @@ SDValue AArch64TargetLowering::PerformDA
return performFDivCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
+ case ISD::SRL:
+ return performSRLCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-rev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-rev.ll?rev=270896&r1=270895&r2=270896&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-rev.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-rev.ll Thu May 26 14:41:33 2016
@@ -16,6 +16,33 @@ entry:
ret i64 %0
}
+; Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 16-bits
+; of %a are zero. This optimizes rev + lsr 16 to rev16.
+define i32 @test_rev_w_srl16(i16 %a) {
+entry:
+; CHECK-LABEL: test_rev_w_srl16:
+; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
+; CHECK: rev16 w0, [[REG]]
+; CHECK-NOT: lsr
+ %0 = zext i16 %a to i32
+ %1 = tail call i32 @llvm.bswap.i32(i32 %0)
+ %2 = lshr i32 %1, 16
+ ret i32 %2
+}
+
+; Canonicalize (srl (bswap x), 32) to (rotr (bswap x), 32) if the high 32-bits
+; of %a are zero. This optimizes rev + lsr 32 to rev32.
+define i64 @test_rev_x_srl32(i32 %a) {
+entry:
+; CHECK-LABEL: test_rev_x_srl32:
+; CHECK: rev32 x0, {{x[0-9]+}}
+; CHECK-NOT: lsr
+ %0 = zext i32 %a to i64
+ %1 = tail call i64 @llvm.bswap.i64(i64 %0)
+ %2 = lshr i64 %1, 32
+ ret i64 %2
+}
+
declare i32 @llvm.bswap.i32(i32) nounwind readnone
declare i64 @llvm.bswap.i64(i64) nounwind readnone
More information about the llvm-commits
mailing list