[llvm-commits] [llvm] r155866 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineCalls.cpp test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll

Tue May 1 01:07:35 PDT 2012

Hi Lang,

On 01/05/12 02:20, Lang Hames wrote:
> Author: lhames
> Date: Mon Apr 30 19:20:38 2012
> New Revision: 155866
>
> URL: http://llvm.org/viewvc/llvm-project?rev=155866&view=rev
> Log:
> Add support for llvm.arm.neon.vmull* intrinsics to InstCombine. Fixes
> <rdar://problem/11291436>.
>
> This is a second attempt at a fix for this, the first was r155468. Thanks
> to Chandler, Bob and others for the feedback that helped me improve this.

are these intrinsics actually useful?  Couldn't you get the same effect using
generic IR, eg: first extend <4 x i16> to <4 x i32> then do the multiplication
in <4 x i32>; and have the ARM code generators recognize this pattern and create
the appropriate vmull instruction?

Ciao, Duncan.

>
> Added:
>      llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
> Modified:
>      llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
>
> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=155866&r1=155865&r2=155866&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Apr 30 19:20:38 2012
> @@ -694,6 +694,57 @@
>       break;
>     }
>
> +  case Intrinsic::arm_neon_vmulls:
> +  case Intrinsic::arm_neon_vmullu: {
> +    Value *Arg0 = II->getArgOperand(0);
> +    Value *Arg1 = II->getArgOperand(1);
> +
> +    // Handle mul by zero first:
> +    if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
> +      return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
> +    }
> +
> +    // Check for constant LHS&  RHS - in this case we just simplify.
> +    bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
> +    VectorType *NewVT = cast<VectorType>(II->getType());
> +    unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
> +    if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
> +      if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
> +        VectorType* VT = cast<VectorType>(CV0->getType());
> +        SmallVector<Constant*, 4>  NewElems;
> +        for (unsigned i = 0; i<  VT->getNumElements(); ++i) {
> +          APInt CV0E =
> +            (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
> +          CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
> +          APInt CV1E =
> +            (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
> +          CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
> +          NewElems.push_back(
> +            ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
> +        }
> +        return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
> +      }
> +
> +      // Couldn't simplify - cannonicalize constant to the RHS.
> +      std::swap(Arg0, Arg1);
> +    }
> +
> +    // Handle mul by one:
> +    if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
> +      if (ConstantInt *Splat =
> +            dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
> +        if (Splat->isOne()) {
> +          if (Zext)
> +            return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
> +          // else
> +          return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
> +        }
> +      }
> +    }
> +
> +    break;
> +  }
> +
>     case Intrinsic::stackrestore: {
>       // If the save is right next to the restore, remove the restore.  This can
>       // happen when variable allocas are DCE'd.
>
> Added: llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll?rev=155866&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll (added)
> +++ llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll Mon Apr 30 19:20:38 2012
> @@ -0,0 +1,68 @@
> +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
> +target triple = "thumbv7-apple-ios0"
> +
> +; RUN: opt -S -instcombine<  %s | FileCheck %s
> +
> +define<4 x i32>  @mulByZero(<4 x i16>  %x) nounwind readnone ssp {
> +entry:
> +  %a = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  %x,<4 x i16>  zeroinitializer) nounwind
> +  ret<4 x i32>  %a
> +; CHECK: entry:
> +; CHECK-NEXT: ret<4 x i32>  zeroinitializer
> +}
> +
> +define<4 x i32>  @mulByOne(<4 x i16>  %x) nounwind readnone ssp {
> +entry:
> +  %a = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  %x,<4 x i16>  <i16 1, i16 1, i16 1, i16 1>) nounwind
> +  ret<4 x i32>  %a
> +; CHECK: entry:
> +; CHECK-NEXT: %a = sext<4 x i16>  %x to<4 x i32>
> +; CHECK-NEXT: ret<4 x i32>  %a
> +}
> +
> +define<4 x i32>  @constantMul() nounwind readnone ssp {
> +entry:
> +  %a = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  <i16 3, i16 3, i16 3, i16 3>,<4 x i16>  <i16 2, i16 2, i16 2, i16 2>) nounwind
> +  ret<4 x i32>  %a
> +; CHECK: entry:
> +; CHECK-NEXT: ret<4 x i32>  <i32 6, i32 6, i32 6, i32 6>
> +}
> +
> +define<4 x i32>  @constantMulS() nounwind readnone ssp {
> +entry:
> +  %b = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  <i16 -1, i16 -1, i16 -1, i16 -1>,<4 x i16>  <i16 1, i16 1, i16 1, i16 1>) nounwind
> +  ret<4 x i32>  %b
> +; CHECK: entry:
> +; CHECK-NEXT: ret<4 x i32>  <i32 -1, i32 -1, i32 -1, i32 -1>
> +}
> +
> +define<4 x i32>  @constantMulU() nounwind readnone ssp {
> +entry:
> +  %b = tail call<4 x i32>  @llvm.arm.neon.vmullu.v4i32(<4 x i16>  <i16 -1, i16 -1, i16 -1, i16 -1>,<4 x i16>  <i16 1, i16 1, i16 1, i16 1>) nounwind
> +  ret<4 x i32>  %b
> +; CHECK: entry:
> +; CHECK-NEXT: ret<4 x i32>  <i32 65535, i32 65535, i32 65535, i32 65535>
> +}
> +
> +define<4 x i32>  @complex1(<4 x i16>  %x) nounwind readnone ssp {
> +entry:
> +  %a = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  <i16 2, i16 2, i16 2, i16 2>,<4 x i16>  %x) nounwind
> +  %b = add<4 x i32>  zeroinitializer, %a
> +  ret<4 x i32>  %b
> +; CHECK: entry:
> +; CHECK-NEXT: %a = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  <i16 2, i16 2, i16 2, i16 2>,<4 x i16>  %x) nounwind
> +; CHECK-NEXT: ret<4 x i32>  %a
> +}
> +
> +define<4 x i32>  @complex2(<4 x i32>  %x) nounwind readnone ssp {
> +entry:
> +  %a = tail call<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>  <i16 3, i16 3, i16 3, i16 3>,<4 x i16>  <i16 2, i16 2, i16 2, i16 2>) nounwind
> +  %b = add<4 x i32>  %x, %a
> +  ret<4 x i32>  %b
> +; CHECK: entry:
> +; CHECK-NEXT: %b = add<4 x i32>  %x,<i32 6, i32 6, i32 6, i32 6>
> +; CHECK-NEXT: ret<4 x i32>  %b
> +}
> +
> +declare<4 x i32>  @llvm.arm.neon.vmulls.v4i32(<4 x i16>,<4 x i16>) nounwind readnone
> +declare<4 x i32>  @llvm.arm.neon.vmullu.v4i32(<4 x i16>,<4 x i16>) nounwind readnone
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits