[PATCH] [AArch64] Fix the bug PR21675 about lowering llvm.ctpop.i32
Hao Liu
Hao.Liu at arm.com
Sun Dec 21 22:42:29 PST 2014
Hi t.p.northover,
Hi Tim and other reviewers,
This patch fix the bug PR21675 (http://llvm.org/bugs/show_bug.cgi?id=21675). There is a bug about lowering llvm.ctpop.i32. Previously we use:
SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8)
But ctpop is to do bit count. Using undef doesn't mean the upper bits are all zero.
There are some redundant instructions as follows:
fmov w0, s0
fmov d0, x0
But this should be optimized in other places.
This patch is by Wei-cheng Wang. Review please.
Thanks,
-Hao
http://reviews.llvm.org/D6753
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
test/CodeGen/AArch64/arm64-popcnt.ll
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3457,18 +3457,12 @@
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
- SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
- SDValue VecVal;
- if (VT == MVT::i32) {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
- VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
- VecVal);
- } else {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- }
+ if (VT == MVT::i32)
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
Index: test/CodeGen/AArch64/arm64-popcnt.ll
===================================================================
--- test/CodeGen/AArch64/arm64-popcnt.ll
+++ test/CodeGen/AArch64/arm64-popcnt.ll
@@ -4,7 +4,8 @@
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
-; CHECK: fmov s0, w0
+; CHECK: ubfx x{{[0-9]+}}
+; CHECK: fmov d0, x{{[0-9]+}}
; CHECK: cnt.8b v0, v0
; CHECK: uaddlv.8b h0, v0
; CHECK: fmov w0, s0
@@ -15,7 +16,24 @@
; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
; CHECK-NONEON: mul
+}
+define i32 @cnt32_advsimd_2(<2 x i32> %x) {
+ %1 = extractelement <2 x i32> %x, i64 0
+ %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
+ ret i32 %2
+; CHECK: fmov w0, s0
+; CHECK: fmov d0, x0
+; CHECK: cnt.8b v0, v0
+; CHECK: uaddlv.8b h0, v0
+; CHECK: fmov w0, s0
+; CHECK: ret
+; CHECK-NONEON-LABEL: cnt32_advsimd_2
+; CHECK-NONEON-NOT: 8b
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x55555555
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
+; CHECK-NONEON: mul
}
define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D6753.17544.patch
Type: text/x-patch
Size: 2346 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20141222/731c6373/attachment.bin>
More information about the llvm-commits
mailing list