[llvm] r225165 - Select lower sub,abs pattern to sabd on AArch64

Mon Jan 5 22:56:20 PST 2015

I’m pretty sure these two patterns aren’t equivalent in the case where the original subtraction could have caused an overflow.

—Owen

> On Jan 5, 2015, at 7:11 AM, Karthik Bhat <kv.bhat at samsung.com> wrote:
> 
> Author: karthik
> Date: Mon Jan  5 07:11:07 2015
> New Revision: 225165
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=225165&view=rev
> Log:
> Select lower sub,abs pattern to sabd on AArch64
> 
> This patch lowers patterns such as-
>  sub	v0.4s, v0.4s, v1.4s
>  abs	v0.4s, v0.4s
> to
>  sabd	v0.4s, v0.4s, v1.4s
> on AArch64.
> 
> Review: http://reviews.llvm.org/D6781
> 
> 
> Added:
>    llvm/trunk/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
> Modified:
>    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> 
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=225165&r1=225164&r2=225165&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Mon Jan  5 07:11:07 2015
> @@ -2733,6 +2733,33 @@ defm ORN : SIMDLogicalThreeVector<0, 0b1
>                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
> defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
> 
> +// SABD Vd.<T>, Vn.<T>, Vm.<T> Subtracts the elements of Vm from the corresponding
> +// elements of Vn, and places the absolute values of the results in the elements of Vd.
> +def : Pat<(xor (v8i8 (AArch64vashr (v8i8(sub V64:$Rn, V64:$Rm)), (i32 7))),
> +               (v8i8 (add (v8i8(sub V64:$Rn, V64:$Rm)),
> +               (AArch64vashr (v8i8(sub V64:$Rn, V64:$Rm)), (i32 7))))),
> +          (SABDv8i8 V64:$Rn, V64:$Rm)>;
> +def : Pat<(xor (v4i16 (AArch64vashr (v4i16(sub V64:$Rn, V64:$Rm)), (i32 15))),
> +               (v4i16 (add (v4i16(sub V64:$Rn, V64:$Rm)),
> +               (AArch64vashr (v4i16(sub V64:$Rn, V64:$Rm)), (i32 15))))),
> +          (SABDv4i16 V64:$Rn, V64:$Rm)>;
> +def : Pat<(xor (v2i32 (AArch64vashr (v2i32(sub V64:$Rn, V64:$Rm)), (i32 31))),
> +               (v2i32 (add (v2i32(sub V64:$Rn, V64:$Rm)),
> +               (AArch64vashr (v2i32(sub V64:$Rn, V64:$Rm)), (i32 31))))),
> +          (SABDv2i32 V64:$Rn, V64:$Rm)>;
> +def : Pat<(xor (v16i8 (AArch64vashr (v16i8(sub V128:$Rn, V128:$Rm)), (i32 7))),
> +               (v16i8 (add (v16i8(sub V128:$Rn, V128:$Rm)),
> +               (AArch64vashr (v16i8(sub V128:$Rn, V128:$Rm)), (i32 7))))),
> +          (SABDv16i8 V128:$Rn, V128:$Rm)>;
> +def : Pat<(xor (v8i16 (AArch64vashr (v8i16(sub V128:$Rn, V128:$Rm)), (i32 15))),
> +               (v8i16 (add (v8i16(sub V128:$Rn, V128:$Rm)),
> +               (AArch64vashr (v8i16(sub V128:$Rn, V128:$Rm)), (i32 15))))),
> +          (SABDv8i16 V128:$Rn, V128:$Rm)>;
> +def : Pat<(xor (v4i32 (AArch64vashr (v4i32(sub V128:$Rn, V128:$Rm)), (i32 31))),
> +               (v4i32 (add (v4i32(sub V128:$Rn, V128:$Rm)),
> +               (AArch64vashr (v4i32(sub V128:$Rn, V128:$Rm)), (i32 31))))),
> +          (SABDv4i32 V128:$Rn, V128:$Rm)>;
> +
> def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
>           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
> def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
> 
> Added: llvm/trunk/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll?rev=225165&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll (added)
> +++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-simd-vabs.ll Mon Jan  5 07:11:07 2015
> @@ -0,0 +1,101 @@
> +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
> +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
> +target triple = "aarch64--linux-gnu"
> +
> +; CHECK: testv4i32
> +; CHECK: sabd	v0.4s, v0.4s, v1.4s
> +define void @testv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
> +  %1 = bitcast i32* %b to <4 x i32>*
> +  %2 = load <4 x i32>* %1, align 4
> +  %3 = bitcast i32* %c to <4 x i32>*
> +  %4 = load <4 x i32>* %3, align 4
> +  %5 = sub nsw <4 x i32> %2, %4
> +  %6 = icmp sgt <4 x i32> %5, <i32 -1, i32 -1, i32 -1, i32 -1>
> +  %7 = sub <4 x i32> zeroinitializer, %5
> +  %8 = select <4 x i1> %6, <4 x i32> %5, <4 x i32> %7
> +  %9 = bitcast i32* %a to <4 x i32>*
> +  store <4 x i32> %8, <4 x i32>* %9, align 4
> +  ret void
> +}
> +
> +; CHECK: testv2i32
> +; CHECK: sabd	v0.2s, v0.2s, v1.2s
> +define void @testv2i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
> +  %1 = bitcast i32* %b to <2 x i32>*
> +  %2 = load <2 x i32>* %1, align 4
> +  %3 = bitcast i32* %c to <2 x i32>*
> +  %4 = load <2 x i32>* %3, align 4
> +  %5 = sub nsw <2 x i32> %2, %4
> +  %6 = icmp sgt <2 x i32> %5, <i32 -1, i32 -1>
> +  %7 = sub <2 x i32> zeroinitializer, %5
> +  %8 = select <2 x i1> %6, <2 x i32> %5, <2 x i32> %7
> +  %9 = bitcast i32* %a to <2 x i32>*
> +  store <2 x i32> %8, <2 x i32>* %9, align 4
> +  ret void
> +}
> +
> +; CHECK: testv8i16
> +; CHECK: sabd	v0.8h, v0.8h, v1.8h
> +define void @testv8i16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c){
> +  %1 = bitcast i16* %b to <8 x i16>*
> +  %2 = load <8 x i16>* %1, align 4
> +  %3 = bitcast i16* %c to <8 x i16>*
> +  %4 = load <8 x i16>* %3, align 4
> +  %5 = sub nsw <8 x i16> %2, %4
> +  %6 = icmp sgt <8 x i16> %5,  <i16 -1, i16 -1,i16 -1, i16 -1,i16 -1, i16 -1,i16 -1, i16 -1>
> +  %7 = sub <8 x i16> zeroinitializer, %5
> +  %8 = select <8 x i1> %6, <8 x i16> %5, <8 x i16> %7
> +  %9 = bitcast i16* %a to <8 x i16>*
> +  store <8 x i16> %8, <8 x i16>* %9, align 4
> +  ret void
> +}
> +
> +; CHECK: testv4i16
> +; CHECK: sabd	v0.4h, v0.4h, v1.4h
> +define void @testv4i16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c){
> +  %1 = bitcast i16* %b to <4 x i16>*
> +  %2 = load <4 x i16>* %1, align 4
> +  %3 = bitcast i16* %c to <4 x i16>*
> +  %4 = load <4 x i16>* %3, align 4
> +  %5 = sub nsw <4 x i16> %2, %4
> +  %6 = icmp sgt <4 x i16> %5,  <i16 -1, i16 -1,i16 -1, i16 -1>
> +  %7 = sub <4 x i16> zeroinitializer, %5
> +  %8 = select <4 x i1> %6, <4 x i16> %5, <4 x i16> %7
> +  %9 = bitcast i16* %a to <4 x i16>*
> +  store <4 x i16> %8, <4 x i16>* %9, align 4
> +  ret void
> +}
> +
> +
> +; CHECK: testv16i8
> +; CHECK: sabd	v0.16b, v0.16b, v1.16b
> +define void @testv16i8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c){
> +  %1 = bitcast i8* %b to <16 x i8>*
> +  %2 = load <16 x i8>* %1, align 4
> +  %3 = bitcast i8* %c to <16 x i8>*
> +  %4 = load <16 x i8>* %3, align 4
> +  %5 = sub nsw <16 x i8> %2, %4
> +  %6 = icmp sgt <16 x i8> %5,  <i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1>
> +  %7 = sub <16 x i8> zeroinitializer, %5
> +  %8 = select <16 x i1> %6, <16 x i8> %5, <16 x i8> %7
> +  %9 = bitcast i8* %a to <16 x i8>*
> +  store <16 x i8> %8, <16 x i8>* %9, align 4
> +  ret void
> +}
> +
> +; CHECK: testv8i8
> +; CHECK: sabd	v0.8b, v0.8b, v1.8b
> +define void @testv8i8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c){
> +  %1 = bitcast i8* %b to <8 x i8>*
> +  %2 = load <8 x i8>* %1, align 4
> +  %3 = bitcast i8* %c to <8 x i8>*
> +  %4 = load <8 x i8>* %3, align 4
> +  %5 = sub nsw <8 x i8> %2, %4
> +  %6 = icmp sgt <8 x i8> %5,  <i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1,i8 -1, i8 -1>
> +  %7 = sub <8 x i8> zeroinitializer, %5
> +  %8 = select <8 x i1> %6, <8 x i8> %5, <8 x i8> %7
> +  %9 = bitcast i8* %a to <8 x i8>*
> +  store <8 x i8> %8, <8 x i8>* %9, align 4
> +  ret void
> +}
> +
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits