[PATCH] D39510: [PPC] Use xxbrd to speed up bswap64
Guozhi Wei via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 14:38:39 PDT 2017
Carrot created this revision.
Power doesn't have bswap instructions, so llvm generates following code sequence for bswap64.
rotldi 5, 3, 16
rotldi 4, 3, 8
rotldi 9, 3, 24
rotldi 10, 3, 32
rotldi 11, 3, 48
rotldi 12, 3, 56
rldimi 4, 5, 8, 48
rldimi 4, 9, 16, 40
rldimi 4, 10, 24, 32
rldimi 4, 11, 40, 16
rldimi 4, 12, 48, 8
rldimi 4, 3, 56, 0
But Power9 has vector bswap instructions, they can also be used to speed up scalar bswap intrinsic. With this patch, bswap64 can be translated to:
mtvsrdd 34, 3, 3
xxbrd 34, 34
mfvsrld 3, 34
https://reviews.llvm.org/D39510
Files:
lib/Target/PowerPC/PPCISelLowering.cpp
lib/Target/PowerPC/PPCISelLowering.h
test/CodeGen/PowerPC/bswap64.ll
Index: test/CodeGen/PowerPC/bswap64.ll
===================================================================
--- test/CodeGen/PowerPC/bswap64.ll
+++ test/CodeGen/PowerPC/bswap64.ll
@@ -0,0 +1,13 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc64le-- -mcpu=pwr9 | FileCheck %s
+
+declare i64 @llvm.bswap.i64(i64)
+
+; CHECK: mtvsrdd
+; CHECK: xxbrd
+; CHECK: mfvsrld
+define i64 @bswap64(i64 %x) {
+entry:
+ %0 = call i64 @llvm.bswap.i64(i64 %x)
+ ret i64 %0
+}
+
Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -953,6 +953,7 @@
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -289,14 +289,16 @@
setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
- // PowerPC does not have BSWAP
+ // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
+ // to speed up scalar BSWAP64.
// CTPOP or CTTZ were introduced in P8/P9 respectivelly
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
- setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
if (Subtarget.isISA3_0()) {
+ setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
} else {
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
}
@@ -8556,6 +8558,20 @@
return Op;
}
+// Lower scalar BSWAP64 to xxbrd.
+SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // MTVSRDD
+ Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
+ Op.getOperand(0));
+ // XXBRD
+ Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+ // MFVSRLD or MFVSRD
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+ DAG.getTargetConstant(0, dl, MVT::i32));
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9027,6 +9043,8 @@
case ISD::SREM:
case ISD::UREM:
return LowerREM(Op, DAG);
+ case ISD::BSWAP:
+ return LowerBSWAP(Op, DAG);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D39510.121186.patch
Type: text/x-patch
Size: 2997 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171101/11af7d85/attachment.bin>
More information about the llvm-commits
mailing list