[PATCH] D11089: [NVPTX] declare no vector registers

Thu Jul 9 15:57:23 PDT 2015

Justin, I wonder why NVPTX doesn't leverage vector instructions (such as
vadd) at all. llc on

fadd <2 x float> %a, %b

gives me two add.f32 instead of vadd.f32 or like.

Jingyue

On Thu, Jul 9, 2015 at 3:51 PM, Jingyue Wu <jingyue at google.com> wrote:

> jingyue created this revision.
> jingyue added a reviewer: jholewinski.
> jingyue added a subscriber: llvm-commits.
> Herald added a subscriber: jholewinski.
>
> Without this patch, LoopVectorizer in certain cases (see loop-vectorize.ll)
> produces code with complex control flow which hurts later optimizations.
> Since
> NVPTX doesn't have vector registers in LLVM's sense
> (NVPTXTTI::getRegisterBitWidth(true) == 32), we for now declare no vector
> registers to effectively disable loop vectorization.
>
> http://reviews.llvm.org/D11089
>
> Files:
>   lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
>   lib/Target/NVPTX/NVPTXTargetTransformInfo.h
>   test/CodeGen/NVPTX/loop-vectorize.ll
>
> Index: test/CodeGen/NVPTX/loop-vectorize.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/NVPTX/loop-vectorize.ll
> @@ -0,0 +1,39 @@
> +; RUN: opt < %s -O3 -S | FileCheck %s
> +
> +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
> +target triple = "nvptx64-nvidia-cuda"
> +
> +define void @no_vectorization(i32 %n, i32 %a, i32 %b) {
> +; CHECK-LABEL: no_vectorization(
> +; CHECK-NOT: <4 x i32>
> +; CHECK-NOT: <4 x i1>
> +entry:
> +  %cmp.5 = icmp sgt i32 %n, 0
> +  br i1 %cmp.5, label %for.body.preheader, label %for.cond.cleanup
> +
> +for.body.preheader:                               ; preds = %entry
> +  br label %for.body
> +
> +for.cond.cleanup.loopexit:                        ; preds = %for.body
> +  br label %for.cond.cleanup
> +
> +for.cond.cleanup:                                 ; preds =
> %for.cond.cleanup.loopexit, %entry
> +  ret void
> +
> +for.body:                                         ; preds =
> %for.body.preheader, %for.body
> +  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
> +  %add = add nsw i32 %i.06, %a
> +  %mul = mul nsw i32 %add, %b
> +  %cmp1 = icmp sgt i32 %mul, -1
> +  tail call void @llvm.assume(i1 %cmp1)
> +  %inc = add nuw nsw i32 %i.06, 1
> +  %exitcond = icmp eq i32 %inc, %n
> +  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
> +}
> +
> +declare void @llvm.assume(i1) #0
> +
> +attributes #0 = { nounwind }
> +
> +!nvvm.annotations = !{!0}
> +!0 = !{void (i32, i32, i32)* @no_vectorization, !"kernel", i32 1}
> Index: lib/Target/NVPTX/NVPTXTargetTransformInfo.h
> ===================================================================
> --- lib/Target/NVPTX/NVPTXTargetTransformInfo.h
> +++ lib/Target/NVPTX/NVPTXTargetTransformInfo.h
> @@ -69,6 +69,8 @@
>        TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
>        TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
>        TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
> +
> +  unsigned getNumberOfRegisters(bool Vector);
>  };
>
>  } // end namespace llvm
> Index: lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
> ===================================================================
> --- lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
> +++ lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
> @@ -117,3 +117,9 @@
>                                           Opd1PropInfo, Opd2PropInfo);
>    }
>  }
> +
> +unsigned NVPTXTTIImpl::getNumberOfRegisters(bool Vector) {
> +  if (Vector)
> +    return 0;
> +  return BaseT::getNumberOfRegisters(Vector);
> +}
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150709/bbf4ccc6/attachment.html>