[PATCH] D80271: [PowerPC] Exploit vabsd for some cases on P9
EsmeYi via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 19 23:25:42 PDT 2020
Esme created this revision.
Esme added reviewers: steven.zhang, jsji, nemanjai, PowerPC.
Herald added subscribers: llvm-commits, shchenz, kbarton, hiraditya.
Herald added a project: LLVM.
Exploit vabsd for some cases on P9 <https://reviews.llvm.org/P9>, for example:
void foo (char *restrict p, char *restrict q, char *restrict t)
{
for (int i = 0; i < 16; i++)
t[i] = abs (p[i] - q[i]);
}
this case should be matched to the HW instruction vabsdub.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D80271
Files:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/vec_absd.ll
Index: llvm/test/CodeGen/PowerPC/vec_absd.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/vec_absd.ll
+++ llvm/test/CodeGen/PowerPC/vec_absd.ll
@@ -38,3 +38,45 @@
; CHECK: vabsduw 2, 2, 3
; CHECK: blr
}
+
+define <16 x i8> @test_vabsdub(<16 x i8> %0, <16 x i8> %1) {
+entry:
+ %2 = zext <16 x i8> %0 to <16 x i32>
+ %3 = zext <16 x i8> %1 to <16 x i32>
+ %4 = sub nsw <16 x i32> %2, %3
+ %5 = icmp slt <16 x i32> %4, zeroinitializer
+ %6 = sub nsw <16 x i32> zeroinitializer, %4
+ %7 = select <16 x i1> %5, <16 x i32> %6, <16 x i32> %4
+ %8 = trunc <16 x i32> %7 to <16 x i8>
+ ret <16 x i8> %8
+; CHECK-LABEL: @test_vabsdub
+; CHECK: vabsdub 2, 2, 3
+; CHECK: blr
+}
+
+define <8 x i16> @test_vabsduh(<8 x i16> %0, <8 x i16> %1) {
+entry:
+ %2 = zext <8 x i16> %0 to <8 x i32>
+ %3 = zext <8 x i16> %1 to <8 x i32>
+ %4 = sub nsw <8 x i32> %2, %3
+ %5 = icmp slt <8 x i32> %4, zeroinitializer
+ %6 = sub nsw <8 x i32> zeroinitializer, %4
+ %7 = select <8 x i1> %5, <8 x i32> %6, <8 x i32> %4
+ %8 = trunc <8 x i32> %7 to <8 x i16>
+ ret <8 x i16> %8
+; CHECK-LABEL: @test_vabsduh
+; CHECK: vabsduh 2, 2, 3
+; CHECK: blr
+}
+
+define <4 x i32> @test_vabsduw(<4 x i32> %0, <4 x i32> %1) {
+entry:
+ %2 = sub nsw <4 x i32> %0, %1
+ %3 = icmp slt <4 x i32> %2, zeroinitializer
+ %4 = sub nsw <4 x i32> zeroinitializer, %2
+ %5 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %2
+ ret <4 x i32> %5
+; CHECK-LABEL: @test_vabsduw
+; CHECK: vabsduw 2, 2, 3
+; CHECK: blr
+}
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16031,6 +16031,24 @@
SDLoc dl(N);
SDValue Op0 = N->getOperand(0);
+ // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
+ if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
+ return SDValue();
+ SDValue Sub = Op0.getOperand(0);
+ if (Sub.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = Sub.getOperand(0);
+ SDValue SubOp1 = Sub.getOperand(1);
+ if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
+ (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
+ return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
+ SubOp1.getOperand(0),
+ DCI.DAG.getTargetConstant(0, dl, MVT::i32));
+ }
+ }
+ }
+
// Looking for a truncate of i128 to i64.
if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
return SDValue();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80271.265142.patch
Type: text/x-patch
Size: 2744 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200520/1acb5cf6/attachment.bin>
More information about the llvm-commits
mailing list